def test_get_basic_pipeline_custom(): """Tests get_basic_pipeline with custom estimator""" pipeline = get_basic_pipeline( estimator=SilverkiteEstimator(), score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name, score_func_greater_is_better=False, agg_periods=10, agg_func=np.sum, relative_error_tolerance=None, coverage=None, null_model_params={"strategy": "mean"}) expected_score_func, _, _ = get_score_func_with_aggregation( score_func=EvaluationMetricEnum.MeanAbsolutePercentError. get_metric_func(), agg_periods=10, agg_func=np.sum, greater_is_better=False) # checks estimator parameters assert isinstance(pipeline.steps[-1][-1], SilverkiteEstimator) assert pipeline.steps[-1][-1].fit_algorithm_dict is None assert pipeline.steps[-1][-1].extra_pred_cols is None assert pipeline.steps[-1][-1].coverage is None assert pipeline.steps[-1][-1].null_model_params["strategy"] == "mean" with warnings.catch_warnings(): warnings.simplefilter("ignore") assert_eval_function_equal(pipeline.steps[-1][-1].score_func, expected_score_func)
def test_get_pipeline(df): mt = MyTemplate() # Initializes attributes needed by the function mt.regressor_cols = mt.get_regressor_cols() mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"] metric = EvaluationMetricEnum.MeanSquaredError mt.score_func = metric.name mt.score_func_greater_is_better = metric.get_metric_greater_is_better() mt.config = ForecastConfig( coverage=0.9, evaluation_metric_param=EvaluationMetricParam( cv_selection_metric=metric.name ) ) # Checks get_pipeline output pipeline = mt.get_pipeline() assert isinstance(pipeline, sklearn.pipeline.Pipeline) estimator = pipeline.steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None expected_col_names = ["regressor1", "regressor2", "regressor_categ", "regressor_bool"] assert pipeline.named_steps["input"].transformer_list[2][1].named_steps["select_reg"].column_names == expected_col_names assert_eval_function_equal(pipeline.steps[-1][-1].score_func, metric.get_metric_func())
def assert_basic_pipeline_equal(actual: Pipeline, expected: Pipeline): """Asserts that the two pipelines are equal The Pipelines should be created by `get_basic_pipeline`. """ # checks features actual_params = actual.get_params() expected_params = expected.get_params() check_keys = [ 'input__date__select_date__column_names', 'input__response__select_val__column_names', 'input__response__outlier__use_fit_baseline', 'input__response__outlier__z_cutoff', 'input__response__null__impute_algorithm', 'input__response__null__impute_all', 'input__response__null__impute_params', 'input__response__null__max_frac', 'input__regressors_numeric__select_reg__column_names', 'input__regressors_numeric__select_reg_numeric__exclude', 'input__regressors_numeric__select_reg_numeric__include', 'input__regressors_numeric__outlier__use_fit_baseline', 'input__regressors_numeric__outlier__z_cutoff', 'input__regressors_numeric__normalize__normalize_algorithm', 'input__regressors_numeric__normalize__normalize_params', 'input__regressors_numeric__null__impute_algorithm', 'input__regressors_numeric__null__impute_all', 'input__regressors_numeric__null__impute_params', 'input__regressors_numeric__null__max_frac', 'input__regressors_other__select_reg__column_names', 'input__regressors_other__select_reg_non_numeric__exclude', 'input__regressors_other__select_reg_non_numeric__include', 'degenerate__drop_degenerate' ] for key in check_keys: assert actual_params[key] == expected_params[key],\ f"{key} is different, found {actual_params[key]}, expected {expected_params[key]}" # checks estimator actual_estimator = actual.steps[-1][-1] expected_estimator = expected.steps[-1][-1] assert isinstance(actual_estimator, type(expected_estimator)) actual_estimator_dict = actual_estimator.__dict__.copy() expected_estimator_dict = expected_estimator.__dict__.copy() del actual_estimator_dict["null_model"] del expected_estimator_dict["null_model"] actual_estimator_dict.pop("silverkite", None) expected_estimator_dict.pop("silverkite", None) actual_estimator_dict.pop("silverkite_diagnostics", None) expected_estimator_dict.pop("silverkite_diagnostics", None) actual_score_func = actual_estimator_dict.pop("score_func") expected_score_func = expected_estimator_dict.pop("score_func") assert_equal(actual_estimator_dict, expected_estimator_dict) with warnings.catch_warnings(): warnings.simplefilter("ignore") assert_eval_function_equal(actual_score_func, expected_score_func)
def assert_scoring(scoring, expected_keys=None, agg_periods=None, agg_func=None, relative_error_tolerance=None): """Checks if `scoring` has the expected keys and score functions defined by the other parameters. Parameters ---------- scoring : `dict` ``scoring`` dictionary to check expected_keys : `set` [`str`] or None Expected keys in `scoring` dictionary. If None, does not check the keys. agg_periods : callable or None What was passed to `get_scoring_and_refit` agg_func : `int` or None What was passed to `get_scoring_and_refit` relative_error_tolerance : `float` or None What was passed to `get_scoring_and_refit` Must provide `relative_error_tolerance` to check FRACTION_OUTSIDE_TOLERANCE_NAME. """ if expected_keys is not None: assert scoring.keys() == expected_keys # a few metrics to spot check name_func = { EvaluationMetricEnum.MeanAbsolutePercentError.get_metric_name(): EvaluationMetricEnum.MeanAbsolutePercentError.get_metric_func(), EvaluationMetricEnum.Quantile95.get_metric_name(): EvaluationMetricEnum.Quantile95.get_metric_func(), FRACTION_OUTSIDE_TOLERANCE_NAME: partial(fraction_outside_tolerance, rtol=relative_error_tolerance) } for name, scorer in scoring.items(): assert isinstance(scorer, _PredictScorerDF) assert scorer._sign == 1 # because greater_is_better=True if name in name_func: expected_func = get_score_func_with_aggregation( score_func=name_func[name], agg_periods=agg_periods, agg_func=agg_func)[0] assert_eval_function_equal(scorer._score_func, expected_func)
def test_get_scoring_and_refit(): """Tests `get_scoring_and_refit`""" enum = EvaluationMetricEnum.MeanAbsolutePercentError scoring, refit = get_scoring_and_refit() assert_refit( refit, expected_metric=enum.get_metric_name(), expected_greater_is_better=enum.get_metric_greater_is_better()) expected_keys = {enum.get_metric_name()} assert_scoring(scoring=scoring, expected_keys=expected_keys) # Tests all parameters where `score_func_greater_is_better=True`, # `score_func` is contained in `cv_report_metrics`, # and `cv_report_metrics=CV_REPORT_METRICS_ALL`. enum = EvaluationMetricEnum.Correlation agg_periods = 7 agg_func = np.sum relative_error_tolerance = 0.025 scoring, refit = get_scoring_and_refit( score_func=enum.name, score_func_greater_is_better=enum.get_metric_greater_is_better(), cv_report_metrics=CV_REPORT_METRICS_ALL, agg_periods=agg_periods, agg_func=agg_func, relative_error_tolerance=relative_error_tolerance) assert_refit( refit, expected_metric=enum.get_metric_name(), expected_greater_is_better=enum.get_metric_greater_is_better()) enum_names = set(enum.get_metric_name() for enum in EvaluationMetricEnum) assert_scoring(scoring=scoring, expected_keys=enum_names | {FRACTION_OUTSIDE_TOLERANCE_NAME}, agg_periods=agg_periods, agg_func=agg_func, relative_error_tolerance=relative_error_tolerance) # score_func is a callable, # `cv_report_metrics=CV_REPORT_METRICS_ALL`, # and `relative_error_tolerance=None` relative_error_tolerance = None scoring, refit = get_scoring_and_refit( score_func=mean_absolute_error, score_func_greater_is_better=False, cv_report_metrics=CV_REPORT_METRICS_ALL, agg_periods=None, agg_func=None, relative_error_tolerance=relative_error_tolerance) assert_refit( refit, expected_metric=CUSTOM_SCORE_FUNC_NAME, # custom name for callable expected_greater_is_better=False) assert_scoring( scoring=scoring, expected_keys=enum_names | {CUSTOM_SCORE_FUNC_NAME }, # does not include `FRACTION_OUTSIDE_TOLERANCE_NAME` agg_periods=None, agg_func=None, relative_error_tolerance=relative_error_tolerance) assert_eval_function_equal(scoring[CUSTOM_SCORE_FUNC_NAME]._score_func, mean_absolute_error) # `score_func=FRACTION_OUTSIDE_TOLERANCE`, cv_report_metrics is a list relative_error_tolerance = 0.025 cv_report_metrics = [ EvaluationMetricEnum.MeanAbsolutePercentError.name, EvaluationMetricEnum.MeanSquaredError.name ] scoring, refit = get_scoring_and_refit( score_func=FRACTION_OUTSIDE_TOLERANCE, score_func_greater_is_better=False, cv_report_metrics=cv_report_metrics, agg_periods=None, agg_func=None, relative_error_tolerance=relative_error_tolerance) assert_refit(refit, expected_metric=FRACTION_OUTSIDE_TOLERANCE_NAME, expected_greater_is_better=False) assert_scoring( scoring=scoring, expected_keys={ EvaluationMetricEnum.MeanAbsolutePercentError.get_metric_name(), EvaluationMetricEnum.MeanSquaredError.get_metric_name(), FRACTION_OUTSIDE_TOLERANCE_NAME }, agg_periods=None, agg_func=None, relative_error_tolerance=relative_error_tolerance)
def test_get_score_func_with_aggregation(): """Tests get_score_func_with_aggregation function""" # tests callable score function score_func = mean_absolute_error greater_is_better = False score_func, greater_is_better, short_name = get_score_func_with_aggregation( score_func, greater_is_better=greater_is_better, agg_periods=None, agg_func=None) assert_eval_function_equal(score_func, mean_absolute_error) assert greater_is_better is False assert short_name == CUSTOM_SCORE_FUNC_NAME # tests `EvaluationMetricEnum` string lookup score_func = "MedianAbsoluteError" greater_is_better = True # should be overridden score_func, greater_is_better, short_name = get_score_func_with_aggregation( score_func, greater_is_better=greater_is_better, agg_periods=None, agg_func=None) assert_eval_function_equal(score_func, median_absolute_error) assert greater_is_better is False assert short_name == EvaluationMetricEnum.MedianAbsoluteError.get_metric_name( ) # tests `FRACTION_OUTSIDE_TOLERANCE_NAME` lookup score_func = FRACTION_OUTSIDE_TOLERANCE greater_is_better = True # should be overridden score_func, greater_is_better, short_name = get_score_func_with_aggregation( score_func, greater_is_better=greater_is_better, agg_periods=None, agg_func=None, relative_error_tolerance=0.02) assert_eval_function_equal(score_func, partial(fraction_outside_tolerance, rtol=0.02)) assert greater_is_better is False assert short_name == FRACTION_OUTSIDE_TOLERANCE_NAME # tests exception with pytest.raises(NotImplementedError, match=r"Evaluation metric.*not available"): get_score_func_with_aggregation("unknown_estimator") with pytest.raises( ValueError, match="Must specify `relative_error_tolerance` to request " "FRACTION_OUTSIDE_TOLERANCE as a metric."): get_score_func_with_aggregation(score_func=FRACTION_OUTSIDE_TOLERANCE) with pytest.raises( ValueError, match="`score_func` must be an `EvaluationMetricEnum` member name, " "FRACTION_OUTSIDE_TOLERANCE, or callable."): get_score_func_with_aggregation(score_func=["wrong_type"]) # tests preaggregation on score function with pytest.warns(UserWarning) as record: score_func, greater_is_better, short_name = get_score_func_with_aggregation( "MeanAbsoluteError", greater_is_better=False, agg_periods=3, agg_func=np.sum) assert_eval_function_equal( score_func, add_preaggregation_to_scorer(mean_absolute_error, agg_periods=3, agg_func=np.sum)) assert greater_is_better is False assert short_name == EvaluationMetricEnum.MeanAbsoluteError.get_metric_name( ) y_true = pd.Series([3, 1, np.nan, 3, np.Inf]) # np.nan and np.Inf are ignored y_pred = pd.Series([1, 4, 100, 2, -2]) assert score_func(y_true, y_pred) == 0.0 # 7 vs 7 assert "Requested agg_periods=3, but there are only 1. Using all for aggregation" in record[ 0].message.args[0] assert "2 value(s) in y_true were NA or infinite and are omitted in error calc." in record[ 4].message.args[0]
def test_get_basic_pipeline(): """Tests get_basic_pipeline with default parameters""" estimator = ProphetEstimator() pipeline = get_basic_pipeline( estimator=estimator, score_func=EvaluationMetricEnum.MeanSquaredError.name, score_func_greater_is_better=False, agg_periods=None, agg_func=None, relative_error_tolerance=None, coverage=0.95, null_model_params=None, regressor_cols=["regressor1", "regressor2"]) # checks classes assert pipeline.steps[-1][-1] is not estimator assert isinstance(pipeline.steps[-1][-1], ProphetEstimator) assert pipeline.steps[-1][-1].coverage == 0.95 # set to 0.95 assert pipeline.steps[-1][-1].score_func assert estimator.score_func == mean_squared_error # not modified assert isinstance( pipeline.named_steps["input"].transformer_list[0] [1].named_steps["select_date"], ColumnSelector) # checks length of each part of the pipeline assert len(pipeline) == 3 # features, degenerate, estimator assert len(pipeline.named_steps["input"].transformer_list ) == 4 # date, numeric, regressors x2 assert len(pipeline.named_steps["input"].transformer_list[1] [1].named_steps) == 3 # select_val, outlier, null # checks date ColumnSelector parameters assert pipeline.named_steps["input"].transformer_list[0][0] == "date" assert pipeline.named_steps["input"].transformer_list[0][1].named_steps[ "select_date"].column_names[0] == "ts" # checks value column transformation parameters assert pipeline.named_steps["input"].transformer_list[1][0] == "response" assert pipeline.named_steps["input"].transformer_list[1][1].named_steps[ "outlier"].z_cutoff is None assert pipeline.named_steps["input"].transformer_list[1][1].named_steps[ "null"].impute_algorithm == "interpolate" # checks regressor column transformation parameters assert pipeline.named_steps["input"].transformer_list[2][ 0] == "regressors_numeric" assert pipeline.named_steps["input"].transformer_list[2][1].named_steps[ "select_reg"].column_names == ["regressor1", "regressor2"] assert pipeline.named_steps["input"].transformer_list[2][1].named_steps[ "outlier"].z_cutoff is None assert pipeline.named_steps["input"].transformer_list[2][1].named_steps[ "normalize"].normalize_algorithm is None assert pipeline.named_steps["input"].transformer_list[2][1].named_steps[ "null"].impute_algorithm == "interpolate" assert pipeline.named_steps["input"].transformer_list[3][ 0] == "regressors_other" # checks degenerate parameters assert pipeline.named_steps["degenerate"].drop_degenerate is False # checks estimator parameters assert pipeline.steps[-1][-1].growth == "linear" assert pipeline.steps[-1][-1].n_changepoints == 25 assert pipeline.steps[-1][-1].uncertainty_samples == 1000 assert pipeline.steps[-1][-1].coverage == 0.95 assert pipeline.steps[-1][-1].null_model_params is None assert_eval_function_equal( pipeline.steps[-1][-1].score_func, EvaluationMetricEnum.MeanSquaredError.get_metric_func())