Exemple #1
0
def multiplexity_eval_metrics(sample: MultiLayerSplit,
                              pred_edges: pd.DataFrame) -> Dict[str, float]:
    assert len(sample.layer_ids) == 2, 'binary case only yet'
    layer_a, layer_b = sample.layer_ids

    orig_mltplx = multiplexity(sample.full.edges,
                               layer_a,
                               layer_b,
                               reciprocal=False)
    orig_mltrcp = multiplexity(sample.full.edges,
                               layer_a,
                               layer_b,
                               reciprocal=True)
    pred_mltplx = multiplexity(pred_edges, layer_a, layer_b, reciprocal=False)
    pred_mltrcp = multiplexity(pred_edges, layer_a, layer_b, reciprocal=True)

    res = dict(mltplx_tgt=orig_mltplx,
               mltplx_pred=pred_mltplx,
               mltplx_mae=mean_absolute_error([orig_mltplx], [pred_mltplx]),
               mltplx_mape=mean_absolute_percentage_error([orig_mltplx],
                                                          [pred_mltplx]),
               mltrcp_mae=mean_absolute_error([orig_mltrcp], [pred_mltrcp]),
               mltrcp_mape=mean_absolute_percentage_error([orig_mltrcp],
                                                          [pred_mltrcp]))
    return res
Exemple #2
0
def check_constraints(sample, probability_matrix):
    s_in, s_out = empirical_strengths(sample.full.edges,
                                      sample.full.nodes,
                                      marginalized=True)
    """
    Compute deviation of s_in/s_out from expected values.
    Return dict with MAE and weighted MAPE.
    """
    _s_in = probability_matrix.sum(axis=1)
    _s_out = probability_matrix.sum(axis=0)
    corr_in, pv_in = pearsonr(s_in, _s_in)
    corr_out, pv_out = pearsonr(s_out, _s_out)
    spcorr_in, sp_pv_in = spearmanr(s_in, _s_in)
    spcorr_out, sp_pv_out = spearmanr(s_out, _s_out)
    return dict(
        s_in_mae=mean_absolute_error(s_in, _s_in),
        s_out_mae=mean_absolute_error(s_out, _s_out),
        s_in_mape=mean_absolute_percentage_error(s_in, _s_in),
        s_out_mape=mean_absolute_percentage_error(s_out, _s_out),
        # r2_in=r2_score(s_in, _s_in),
        # r2_out=r2_score(s_out, _s_out),
        s_in_js=jensenshannon(s_in, _s_in, base=2)**2,
        s_out_js=jensenshannon(s_out, _s_out, base=2)**2,
        corr_in=corr_in,
        corr_out=corr_out,
        spcorr_in=spcorr_in,
        spcorr_out=spcorr_out,
        pv_in=pv_in,
        pv_out=pv_out,
        sp_pv_in=sp_pv_in,
        sp_pv_out=sp_pv_out,
    )
def run_regression(train_embeds,
                   train_targets,
                   test_embeds,
                   test_targets,
                   scaler=None):

    print('Ravelling train targets...')
    train_targets_ravelled = train_targets.ravel()

    print('Fitting squared loss Regressor...')
    print(np.shape(train_embeds), np.shape(train_targets_ravelled))
    regressor = SGDRegressor(loss="squared_loss", max_iter=1000, tol=1e-3)
    regressor.fit(train_embeds, train_targets_ravelled)

    print('Predicting outputs...')
    tr_res = []
    if scaler == None:
        tr_res.append(
            mean_squared_error(train_targets,
                               regressor.predict(train_embeds),
                               squared=False))
    else:
        rmse = mean_squared_error(train_targets,
                                  regressor.predict(train_embeds),
                                  squared=False)
        tr_res.append(
            scaler.inverse_transform(np.array(rmse).reshape(1, -1))[0][0])
    tr_res.append(
        mean_absolute_percentage_error(train_targets,
                                       regressor.predict(train_embeds)))
    tr_res.append(r2_score(train_targets, regressor.predict(train_embeds)))
    tr_res.append(
        explained_variance_score(train_targets,
                                 regressor.predict(train_embeds)))

    ts_res = []
    if scaler == None:
        ts_res.append(
            mean_squared_error(test_targets,
                               regressor.predict(test_embeds),
                               squared=False))
    else:
        rmse = mean_squared_error(test_targets,
                                  regressor.predict(test_embeds),
                                  squared=False)
        ts_res.append(
            scaler.inverse_transform(np.array(rmse).reshape(1, -1))[0][0])
    ts_res.append(
        mean_absolute_percentage_error(test_targets,
                                       regressor.predict(test_embeds)))
    ts_res.append(r2_score(test_targets, regressor.predict(test_embeds)))
    ts_res.append(
        explained_variance_score(test_targets, regressor.predict(test_embeds)))

    return tr_res, ts_res
Exemple #4
0
def test_deprecation_positional_arguments_mape():
    y_true = [1, 1, 1]
    y_pred = [1, 0, 1]
    sample_weights = [0.5, 0.1, 0.2]
    multioutput = "raw_values"

    warning_msg = "passing these as positional arguments will result in an error"

    # Trigger the warning
    with pytest.warns(FutureWarning, match=warning_msg):
        mean_absolute_percentage_error(y_true, y_pred, sample_weights,
                                       multioutput)
Exemple #5
0
def binary_classification_metrics(target: Iterable[Edge],
                                  pred: Iterable[Edge]):
    target = set(target)
    pred = set(pred)
    num_expected = len(target)
    num_predicted = len(pred)
    L_mape = mean_absolute_percentage_error([num_expected], [num_predicted])

    tp = len(target & pred)
    fp = len(pred - target)
    fn = len(target - pred)
    # tn = n * n - (tp + fp + fn)

    if num_predicted != 0:
        precision = tp / (tp + fp)
    else:
        precision = 1

    if num_expected != 0:
        recall = tp / (tp + fn)
    else:
        recall = 1

    if precision + recall > 0:
        f1 = 2 * precision * recall / (precision + recall)
    else:
        f1 = 0
    return dict(precision=precision,
                recall=recall,
                f1=f1,
                num_expected=num_expected,
                num_predicted=num_predicted,
                L_mape=L_mape)
Exemple #6
0
def objective(trial):

    param = params.copy()

    param.update({
        'eta':
        trial.suggest_loguniform('eta', 0.01, 1),
        'lambda':
        trial.suggest_loguniform('lambda', 1e-4, 1),
        'alpha':
        trial.suggest_loguniform('alpha', 1e-4, 1.0),
        'gamma':
        trial.suggest_categorical('gamma', [0, 1, 5, 20, 70]),
        'max_depth':
        trial.suggest_int('max_depth', 2, 5),
        'min_child_weight':
        trial.suggest_categorical('min_child_weight', [0.5, 1, 5, 8, 20, 50]),
        'subsample':
        trial.suggest_loguniform('subsample', 1e-4, 1),
        'colsample_bytree':
        trial.suggest_loguniform('colsample_bytree', 1e-4, 1),
    })

    model = xgb.train(param,
                      data,
                      num_boost_round=50000,
                      verbose_eval=False,
                      evals=[(X_test, "Test")],
                      early_stopping_rounds=500)

    err = mean_absolute_percentage_error(X_test.get_label(),
                                         model.predict(X_test))

    return err
Exemple #7
0
def metrics_stat(y_true: np.array, y_pred: np.array) -> typing.Dict[str,float]:
    mape = mean_absolute_percentage_error(y_true, y_pred)
    mdape = median_absolute_percentage_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    r2 = r2_score(y_true, y_pred)
    raif_metric = deviation_metric(y_true, y_pred)
    return {'mape':mape, 'mdape':mdape, 'rmse': rmse, 'r2': r2, 'raif_metric':raif_metric}
Exemple #8
0
    def update_model(self, X: np.ndarray, y: np.ndarray):
        # NOTE: the GPR model will be created since the effective search space (the reduced space
        # is dynamic)
        dim = self._search_space.dim
        bounds = np.asarray(self._search_space.bounds)
        self.model = GaussianProcess(
            mean=trend.constant_trend(dim),
            corr="matern",
            thetaL=1e-3 * (bounds[:, 1] - bounds[:, 0]),
            thetaU=1e3 * (bounds[:, 1] - bounds[:, 0]),
            nugget=1e-6,
            noise_estim=False,
            optimizer="BFGS",
            wait_iter=3,
            random_start=max(10, dim),
            likelihood="concentrated",
            eval_budget=100 * dim,
        )

        _std = np.std(y)
        y_ = y if np.isclose(_std, 0) else (y - np.mean(y)) / _std

        self.fmin, self.fmax = np.min(y_), np.max(y_)
        self.frange = self.fmax - self.fmin

        self.model.fit(X, y_)
        y_hat = self.model.predict(X)

        r2 = r2_score(y_, y_hat)
        MAPE = mean_absolute_percentage_error(y_, y_hat)
        self.logger.info(f"model r2: {r2}, MAPE: {MAPE}")
Exemple #9
0
def create_accuracy_plot_and_return_mape_and_cumulative_prod(result_df):
    prediction_array = result_df[
        '2-Year Cumulative Production (bbl)'].to_numpy() / 1000
    solution_array = result_df['Total_Prod_Well'].to_numpy() / 1000
    plt.figure(figsize=(6, 4))
    plt.plot(solution_array, prediction_array, 'o', label='Estimates')
    plt.plot([0, 1500], [0, 1500], '--r', label='1:1 line')
    plt.plot([solution_array[0], solution_array[0]],
             [prediction_array[0], solution_array[0]],
             '--',
             color='gray',
             label='misfit')
    for i in range(1, 3):
        plt.plot([solution_array[i], solution_array[i]],
                 [prediction_array[i], solution_array[i]],
                 '--',
                 color='gray')
    plt.xlabel('True, MSTB')
    plt.ylabel('Prediction, MSTB')
    plt.grid('on')
    plt.legend()
    plt.axis([0, 1500, 0, 1500])
    plt.savefig('accuracy.pgf')
    mape = np.round(
        mean_absolute_percentage_error(solution_array, prediction_array), 5)
    total_prod = result_df['Total_Prod_Field'].to_numpy()[0]
    return mape, total_prod
Exemple #10
0
    def update_model(self):
        # TODO: implement a proper model selection here
        # TODO: in case r2 is really poor, re-fit the model or log-transform `fitness`?
        data = self.data
        fitness = data.fitness

        # TODO: to standardize the response values to prevent numerical overflow that might
        # appear in the MGF-based acquisition function.
        # Standardization should make it easier to specify the GP prior, compared to
        # rescaling values to the unit interval.
        _std = np.std(fitness)
        if len(fitness) > 5 and np.isclose(_std, 0):
            raise FlatFitnessError()

        fitness_ = fitness if np.isclose(
            _std, 0) else (fitness - np.mean(fitness)) / np.std(fitness)
        self.fmin, self.fmax = np.min(fitness_), np.max(fitness_)
        self.frange = self.fmax - self.fmin

        self.model.fit(data, fitness_.reshape(-1, 1))
        fitness_hat = self.model.predict(data)

        r2 = r2_score(fitness_, fitness_hat)
        MAPE = mean_absolute_percentage_error(fitness_, fitness_hat)
        self.logger.info(f"model r2: {r2}, MAPE: {MAPE}")
Exemple #11
0
    def run(self, duration, name, interval, plot=False, record=True, verbose=True):
        begin = time.time()
        self._warmStart()
        self.cur = self.startPont
        acceptable = False

        # streaming
        while time.time() - begin < duration and self.nxt < len(self.yTrue):
            time.sleep(interval)
            self._update()
            self._predict()

            # evaluate model
            if self.lazy and self.cur > 12:
                acceptable = self._evaluate('mape', 12, 0.1)
                if not acceptable:
                    self._learnMany(numberOfData=12)
            else:
                self._learnOne()   
               
        if plot:
            plotlyplot(actual=self.actualList, prediction=self.predList,
                   times=self.times[:self.cur-1], plotname=name)

        if record:         
            dict = {'actual': self.actualList, 'predict': self.predList, 'time': self.timeline}        
            df = pd.DataFrame(dict)
            df.to_csv('/Users/cicada/Documents/DTU_resource/Thesis/Incremental-learning-EL/src/results-'+name + '.csv')
            # print(df.head())
            
        if verbose:
            print(mean_absolute_percentage_error(self.actualList[200:], self.predList[200:]))
Exemple #12
0
def eval_model(model, pred_step, counts):
    model = model[::pred_step]
    counts = counts[::pred_step]

    lbs = [5, 5, 5, 5, 5, 5, 5, 5, 5]
    ubs = [5, 5, 5, 5, 5, 5, 5, 5, 5]
    conf = 0.95

    print("Total Values: {}".format(min(len(model), len(counts))))
    print("Forecast: {}".format(model))
    print("Validation Set: {}".format(counts))
    print()

    mae = mean_absolute_error(model, counts)
    rmse = math.sqrt(mean_squared_error(model, counts))
    mape = mean_absolute_percentage_error(model, counts)
    smape = symmetric_mean_absolute_percentage_error(model, counts)
    mase = mean_absolute_scaled_error(model, counts)
    # ias = interval_accuracy_score(counts, lbs, ubs, conf)

    print("Mean Absolute Error: {}".format(mae))
    print("Root Mean Squared Error: {}".format(rmse))
    print("Mean Absolute Percentage Error: {}".format(mape))
    print("Symmetric Mean Absolute Percentage Error: {}".format(smape))
    print("Mean Absolute Scaled Error: {}".format(mase))
    # print("Mean Interval Accuracy Score: {}".format(ias))

    return rmse
Exemple #13
0
def main():

    estimators = [('lr', Ridge())]

    backup = [('gb', GradientBoostingRegressor()),
              ('rf', RandomForestRegressor())]

    run = TrainPredictDuration('resource/config.ini', 'model')
    subscr_type = run.subscr_type
    df = run.data_preparation()
    df = run.feature_engineering(df)
    X, y, feature_names = run.feature_selection(df)
    print(feature_names)

    for estimator in estimators:
        model_name = estimator[0] + '_{}'.format(subscr_type)
        model, y_pred, y_test = run.grid_search_train(X, y, estimator[1],
                                                      model_name)
        print('{}-MSE: '.format(model_name),
              mean_squared_error(y_test, y_pred))
        print('{}-MAPE: '.format(model_name),
              mean_absolute_percentage_error(y_test, y_pred))
        print('{}-R2: '.format(model_name), r2_score(y_test, y_pred))
        run.save_model(model, '{}.sav'.format(model_name))
        run.save_output(y_pred, y_test, '{}.json'.format(model_name))

        if model_name == 'lr' + '_{}'.format(subscr_type):
            for idx, value in enumerate(feature_names):
                print(value + ': ', round(model.coef_[idx], 5))
Exemple #14
0
def plotHoltWinters(series, plot_intervals=False, plot_anomalies=False):
    """
        series - dataset with timeseries
        plot_intervals - show confidence intervals
        plot_anomalies - show anomalies 
    """
    
    plt.figure(figsize=(20, 10))
    plt.plot(model.result, label = "Model")
    plt.plot(series.values, label = "Actual")
    error = mean_absolute_percentage_error(series.values, model.result[:len(series)])
    plt.title("Mean Absolute Percentage Error: {0:.2f}%".format(error))
    
    if plot_anomalies:
        anomalies = np.array([np.NaN]*len(series))
        anomalies[series.values<model.LowerBond[:len(series)]] = \
            series.values[series.values<model.LowerBond[:len(series)]]
        anomalies[series.values>model.UpperBond[:len(series)]] = \
            series.values[series.values>model.UpperBond[:len(series)]]
        plt.plot(anomalies, "o", markersize=10, label = "Anomalies")
    
    if plot_intervals:
        plt.plot(model.UpperBond, "r--", alpha=0.5, label = "Up/Low confidence")
        plt.plot(model.LowerBond, "r--", alpha=0.5)
        plt.fill_between(x=range(0,len(model.result)), y1=model.UpperBond, 
                         y2=model.LowerBond, alpha=0.2, color = "grey")    
        
    plt.vlines(len(series), ymin=min(model.LowerBond), ymax=max(model.UpperBond), linestyles='dashed')
    plt.axvspan(len(series)-20, len(model.result), alpha=0.3, color='lightgrey')
    plt.grid(True)
    plt.axis('tight')
    plt.legend(loc="best", fontsize=13);
Exemple #15
0
    def evaluate(self):
        r2 = r2_score(self.test_y, self.y_pred)
        rmse = mean_squared_error(self.test_y, self.y_pred, squared=False)
        mae = mean_absolute_error(self.test_y, self.y_pred)
        mape = mean_absolute_percentage_error(self.test_y, self.y_pred)

        return r2, rmse, mae, mape
Exemple #16
0
def calc_metrics(y_true, y_pred):
    """
        This Function calculates all relevant metrics for model evaluation.

        :param y_true: Array-like list of true values.
        :param y_pred: Array-like list of predicted values.
    """
    logger.info("Start calc_metrics()")

    # calc metrics
    mae = met.mean_absolute_error(y_true, y_pred)
    mse = met.mean_squared_error(y_true, y_pred)
    rmse = met.mean_squared_error(y_true, y_pred, squared=False)
    mape = met.mean_absolute_percentage_error(y_true, y_pred)
    r2 = met.r2_score(y_true, y_pred)

    # combine results into dataframe
    metrics_series = [
        mae,
        mse,
        rmse  # , mape
        ,
        r2
    ]

    # clean small values, so SQL can parse str to numeric
    metrics_series = [round(num, 4) for num in metrics_series]

    return metrics_series
Exemple #17
0
def eval_model(predictions, pred_step, actual, verbose=False):
    predictions = predictions[::pred_step]
    actual = actual[::pred_step]

    mae = mean_absolute_error(actual, predictions)
    rmse = math.sqrt(mean_squared_error(actual, predictions))
    ignore_zero_values = np.where(np.array(actual) == 0, 0, 1)
    mape = mean_absolute_percentage_error(actual, predictions,
                                          ignore_zero_values)
    smape = symmetric_mean_absolute_percentage_error(actual, predictions)
    mase = mean_absolute_scaled_error(actual, predictions)
    ias = interval_accuracy_score(actual, predictions)
    if verbose:
        print("Total Values: {}".format(min(len(predictions), len(actual))))
        print("Forecast: {}".format(predictions))
        print("Validation Set: {}".format(actual))
        print()
        print("Mean Absolute Error: {}".format(mae))
        print("Root Mean Squared Error: {}".format(rmse))
        print("Mean Absolute Percentage Error: {}".format(mape))
        print("Symmetric Mean Absolute Percentage Error: {}".format(smape))
        print("Mean Absolute Scaled Error: {}".format(mase))
        print("Mean Interval Accuracy Score: {}".format(ias))
    metrics = dict(mae=mae,
                   rmse=rmse,
                   mape=mape,
                   smape=smape,
                   mase=mase,
                   ias=ias)
    return metrics
Exemple #18
0
def get_metrics(y_true, y_pred):
    """ Calculates all desired metrics for a given set of predictions and ground truths.
    
    Parameters:
    -----------
    y_true : ndarray, shape [N]
        Ground truth class labels.
    y_pred : ndarray, shape [N]
        Predicted class probabilities or hard labels.
        
    Returns:
    --------
    metrics : defaultdict
        A dict containing values for all metrics. 
    """
    metrics = defaultdict(float)
    #metrics['accuracy'] = accuracy_score(y_true, y_pred >= .5)
    metrics['accuracy'] = mean_absolute_percentage_error(y_true, y_pred)
    try: 
        metrics['auc'] = roc_auc_score(y_true, y_pred)
    except: 
        # AUC is not defined, if only one class is present 
        metrics['auc'] = np.nan
    metrics['ppr'] = (y_pred >= .5).sum() / y_pred.shape[0]
    return metrics
Exemple #19
0
    def evaluate_forecast(self):
        n = min(len(self.validation_data), len(self.forecasts))
        y_forecast = self.forecasts[:n]
        y_actual = self.validation_data.tail(n)["close"]

        mean_abs_err = learn.mean_absolute_error(y_actual, y_forecast)
        mean_sq_err = learn.mean_squared_error(y_actual, y_forecast)
        mean_sq_lg_err = learn.mean_squared_log_error(y_actual, y_forecast)
        mean_abs_percent_err = learn.mean_absolute_percentage_error(
            y_actual, y_forecast)
        median_abs_err = learn.median_absolute_error(y_actual, y_forecast)
        mean_gamma_dev = learn.mean_gamma_deviance(y_actual, y_forecast)
        mean_poisson_dev = learn.mean_poisson_deviance(y_actual, y_forecast)
        mean_tweedie_dev = learn.mean_tweedie_deviance(y_actual, y_forecast)
        explained_variance = learn.explained_variance_score(
            y_actual, y_forecast)
        max_residual = learn.max_error(y_actual, y_forecast)
        coeff_determination = learn.r2_score(y_actual, y_forecast)

        metrics = {
            "Mean Squared Error (MSE)": mean_sq_err,
            "Mean Absolute Error (MAE)": mean_abs_err,
            "Mean Squared Logarithmic Error (MSLE)": mean_sq_lg_err,
            "Mean Absolute Percentage Error (MAPE)": mean_abs_percent_err,
            "Median Absolute Error (MedAE)": median_abs_err,
            "Mean Gamma Deviance": mean_gamma_dev,
            "Mean Poisson Deviance": mean_poisson_dev,
            "Mean Tweedie Deviance Error": mean_tweedie_dev,
            "Explained Variance Regression Score": explained_variance,
            "Max Residual Error": max_residual,
            "Coefficient of Determination": coeff_determination
        }
        self.metrics = metrics
Exemple #20
0
def calc_performance_metrics(
        labels: np.ndarray,
        predictions: np.ndarray,
        decimal_points: Optional[int] = 4) -> _PerformanceMetrics:
    """Calculates performance metrics related to a regression model.

  Args:
    labels: An array of true labels containing numeric values.
    predictions: An array of predictions containing numeric values.
    decimal_points: Number of decimal points to use when outputting the
      calculated performance metrics.

  Returns:
    Object of _PerformanceMetrics class containing the regression diagnostics
      metrics.
  """
    utils.assert_label_and_prediction_length_match(labels, predictions)

    mse = metrics.mean_squared_error(labels, predictions)
    rmse = np.sqrt(mse)
    mae = metrics.mean_absolute_error(labels, predictions)
    mape = metrics.mean_absolute_percentage_error(labels, predictions)
    r2 = metrics.r2_score(labels, predictions)
    corr = sp.stats.pearsonr(labels, predictions)[0]

    return _PerformanceMetrics(
        mean_squared_error=round(mse, decimal_points),
        root_mean_squared_error=round(rmse, decimal_points),
        mean_absolute_error=round(mae, decimal_points),
        mean_absolute_percentage_error=round(mape, decimal_points),
        r_squared=round(r2, decimal_points),
        pearson_correlation=round(corr, decimal_points))
def print_score():
    df = get_pure_cases_df()
    X, y = sliding_window(df, 28, 14)
    assert X.shape[0] == y.shape[0]
    X = X.numpy()
    y = y.numpy()
    mae = []
    rmse = []
    mape = []
    for i in range(X.shape[0]):
        mae.append(
            mean_absolute_error(y[i], np.broadcast_to(X[i][-1],
                                                      (y.shape[1], 1))))
        rmse.append(
            mean_squared_error(y[i],
                               np.broadcast_to(X[i][-1], (y.shape[1], 1)),
                               squared=False))
        mape.append(
            mean_absolute_percentage_error(
                y[i], np.broadcast_to(X[i][-1], (y.shape[1], 1))))

    mae = np.array(mae)
    rmse = np.array(rmse)
    mape = np.array(mape)
    print(f"RMSE: {rmse.mean()}, MAE: {mae.mean()}, MAPE: {mape.mean()}")
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rmsew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6], squared=False)
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    mapew = mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw2 = explained_variance_score(
        y_true, y_pred, multioutput=[0.4, 0.6], force_finite=False
    )

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(rmsew, 0.59, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(mapew, 0.1668, decimal=2)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)
    assert_almost_equal(evsw2, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(
        np.log(1 + y_true), np.log(1 + y_pred), multioutput=[0.3, 0.7]
    )
    assert_almost_equal(msle, msle2, decimal=2)
def test_multioutput_regression():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])

    error = mean_squared_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = mean_squared_error(y_true, y_pred, squared=False)
    assert_almost_equal(error, 0.454, decimal=2)

    error = mean_squared_log_error(y_true, y_pred)
    assert_almost_equal(error, 0.200, decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    error = mean_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. + 2. / 3) / 4.)

    error = np.around(mean_absolute_percentage_error(y_true, y_pred),
                      decimals=2)
    assert np.isfinite(error)
    assert error > 1e6
    error = median_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. + 1.) / 4.)

    error = r2_score(y_true, y_pred, multioutput='variance_weighted')
    assert_almost_equal(error, 1. - 5. / 2)
    error = r2_score(y_true, y_pred, multioutput='uniform_average')
    assert_almost_equal(error, -.875)
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(
        mean_squared_log_error(y_true, y_pred),
        mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    assert np.isfinite(mape)
    assert mape > 1e6
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=0),
                        mean_squared_error(y_true, y_pred))

    # Tweedie deviance needs positive y_pred, except for p=0,
    # p>=2 needs positive y_true
    # results evaluated by sympy
    y_true = np.arange(1, 1 + n_samples)
    y_pred = 2 * y_true
    n = n_samples
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=-1),
                        5 / 12 * n * (n**2 + 2 * n + 1))
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1),
                        (n + 1) * (1 - np.log(2)))
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2),
                        2 * np.log(2) - 1)
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3 / 2),
                        ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum())
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3),
                        np.sum(1 / y_true) / (4 * n))
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_error([0.], [0.], squared=False), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_percentage_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([-1.], [-1.])
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([1., 2., 3.], [1., -2., 3.])
    err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
               "contain negative values.")
    with pytest.raises(ValueError, match=err_msg):
        mean_squared_log_error([1., -2., 3.], [1., 2., 3.])

    # Tweedie deviance error
    power = -1.2
    assert_allclose(mean_tweedie_deviance([0], [1.], power=power),
                    2 / (2 - power), rtol=1e-3)
    with pytest.raises(ValueError,
                       match="can only be used on strictly positive y_pred."):
        mean_tweedie_deviance([0.], [0.], power=power)
    assert_almost_equal(mean_tweedie_deviance([0.], [0.], power=0), 0.00, 2)

    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.], [0.], power=1.0)

    power = 1.5
    assert_allclose(mean_tweedie_deviance([0.], [1.], power=power),
                    2 / (2 - power))
    msg = "only be used on non-negative y and strictly positive y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.], [0.], power=power)
    power = 2.
    assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00,
                    atol=1e-8)
    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.], [0.], power=power)
    power = 3.
    assert_allclose(mean_tweedie_deviance([1.], [1.], power=power),
                    0.00, atol=1e-8)

    msg = "can only be used on strictly positive y and y_pred."
    with pytest.raises(ValueError, match=msg):
        mean_tweedie_deviance([0.], [0.], power=power)

    with pytest.raises(ValueError,
                       match="is only defined for power<=0 and power>=1"):
        mean_tweedie_deviance([0.], [0.], power=0.5)
Exemple #26
0
def get_glm_prediction():
    confirmed_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"  # noqa
    confirmed_cases = pd.read_csv(confirmed_cases_url, sep=",")
    gg = confirmed_cases[confirmed_cases["Country/Region"] == "India"]
    gg = gg.T
    gg["cases"] = gg[147]
    df = gg.drop(147, axis=1).iloc[4:]
    df = df.diff().iloc[1:]
    rmse = []
    mae = []
    mapes = []
    arr = df["cases"].to_numpy(dtype=np.float64)
    df["one_old"] = df["cases"].shift(1)
    df["two_old"] = df["cases"].shift(2)
    df["three_old"] = df["cases"].shift(3)
    df = df[5:]
    x_train = df[["one_old", "two_old", "three_old"]]
    y_train = df["cases"]
    x_test = x_train[275:300]
    y_test = y_train[275:300]
    x_train = x_train[:275]
    y_train = y_train[:275]
    xtrain = x_train.to_numpy(dtype=np.float64)
    ytrain = y_train.to_numpy(dtype=np.float64)
    xtest = x_test.to_numpy(dtype=np.float64)
    ytest = y_test.to_numpy(dtype=np.float64)

    poisson_training_results = sm.GLM(ytrain[:], xtrain[:, :]).fit()

    poisson_predictions = poisson_training_results.get_prediction(xtest)
    predictions_summary_frame = poisson_predictions.summary_frame()
    predictions_summary_frame["Actual"] = ytest
    predictions_summary_frame["mean"] = predictions_summary_frame["mean"]

    for i in range(14):
        xtest = np.hstack(
            (np.array(predictions_summary_frame["mean"]).reshape(-1, 1),
             xtest[:, :-1]))
        ytest = np.vstack((
            ytest[1:].reshape(-1, 1),
            (np.array(df["cases"][300 + i:300 + i + 1],
                      dtype="float")).reshape(-1, 1),
        ))
        predictions_summary_frame[
            "mean"] = poisson_training_results.get_prediction(
                xtest).summary_frame()["mean"]
        rmse.append(
            mean_squared_error((predictions_summary_frame["mean"]), (ytest),
                               squared=False))
        mae.append(
            mean_absolute_error((predictions_summary_frame["mean"]), (ytest)))
        mapes.append(
            mean_absolute_percentage_error(predictions_summary_frame["mean"],
                                           ytest))

    rmse = np.array(rmse)
    mae = np.array(mae)
    mapes = np.array(mapes)
    print(f"RMSE: {rmse.mean()}, MAE: {mae.mean()}, MAPE: {mapes.mean()}")
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1
    y_pred_2 = y_true - 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.0)
    assert_almost_equal(
        mean_squared_log_error(y_true, y_pred),
        mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred)),
    )
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.0)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred), 0.5)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred_2), 0.5)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred, alpha=0.4), 0.6)
    assert_almost_equal(mean_pinball_loss(y_true, y_pred_2, alpha=0.4), 0.4)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.0)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    assert np.isfinite(mape)
    assert mape > 1e6
    assert_almost_equal(max_error(y_true, y_pred), 1.0)
    assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.0)
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=0),
        mean_squared_error(y_true, y_pred),
    )
    assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=0),
                        r2_score(y_true, y_pred))

    # Tweedie deviance needs positive y_pred, except for p=0,
    # p>=2 needs positive y_true
    # results evaluated by sympy
    y_true = np.arange(1, 1 + n_samples)
    y_pred = 2 * y_true
    n = n_samples
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=-1),
        5 / 12 * n * (n**2 + 2 * n + 1),
    )
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1),
                        (n + 1) * (1 - np.log(2)))
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2),
                        2 * np.log(2) - 1)
    assert_almost_equal(
        mean_tweedie_deviance(y_true, y_pred, power=3 / 2),
        ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum(),
    )
    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3),
                        np.sum(1 / y_true) / (4 * n))

    dev_mean = 2 * np.mean(xlogy(y_true, 2 * y_true / (n + 1)))
    assert_almost_equal(
        d2_tweedie_score(y_true, y_pred, power=1),
        1 - (n + 1) * (1 - np.log(2)) / dev_mean,
    )

    dev_mean = 2 * np.log((n + 1) / 2) - 2 / n * np.log(factorial(n))
    assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=2),
                        1 - (2 * np.log(2) - 1) / dev_mean)
 def metric_by_category(df):
     r2 = r2_score(df[constants.label_header], df['pred'])
     mape = mean_absolute_percentage_error(df[constants.label_header],
                                           df['pred'])
     mae = mean_absolute_error(df[constants.label_header], df['pred'])
     rmse = np.sqrt(
         mean_squared_error(df[constants.label_header], df['pred']))
     return pd.Series(dict(r2=r2, mape=mape, mae=mae, rmse=rmse))
def test_regression_multioutput_array():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    mape = mean_absolute_percentage_error(y_true,
                                          y_pred,
                                          multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')

    assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2)
    assert_array_almost_equal(mae, [0.25, 0.625], decimal=2)
    assert_array_almost_equal(mape, [0.0778, 0.2262], decimal=2)
    assert_array_almost_equal(r, [0.95, 0.93], decimal=2)
    assert_array_almost_equal(evs, [0.95, 0.93], decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    y_true = [[0, 0]] * 4
    y_pred = [[1, 1]] * 4
    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(mse, [1., 1.], decimal=2)
    assert_array_almost_equal(mae, [1., 1.], decimal=2)
    assert_array_almost_equal(r, [0., 0.], decimal=2)

    r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
    assert_array_almost_equal(r, [0, -3.5], decimal=2)
    assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                  multioutput='uniform_average')
    evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                   multioutput='raw_values')
    assert_array_almost_equal(evs, [0, -1.25], decimal=2)

    # Checking for the condition in which both numerator and denominator is
    # zero.
    y_true = [[1, 3], [-1, 2]]
    y_pred = [[1, 4], [-1, 1]]
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(r2, [1., -3.], decimal=2)
    assert np.mean(r2) == r2_score(y_true,
                                   y_pred,
                                   multioutput='uniform_average')
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(evs, [1., -3.], decimal=2)
    assert np.mean(evs) == explained_variance_score(y_true, y_pred)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    msle2 = mean_squared_error(np.log(1 + y_true),
                               np.log(1 + y_pred),
                               multioutput='raw_values')
    assert_array_almost_equal(msle, msle2, decimal=2)
Exemple #30
0
    def _mape(self):
        """
        Function to calculate mean-absolute-percentage-error
        """
        mape = mean_absolute_percentage_error(y_true=self.y_true,
                                              y_pred=self.y_pred,
                                              multioutput=self.multioutput)

        return mape