Python load_airlineの例、sktime.datasets.load_airline Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_online_learning.py プロジェクト: fspinna/sktime_forked

def test_weights_for_airline_averaging():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = OnlineEnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
        (
            "holt",
            ExponentialSmoothing(trend="add",
                                 damped_trend=False,
                                 seasonal="multiplicative",
                                 sp=12),
        ),
        (
            "damped_trend",
            ExponentialSmoothing(trend="add",
                                 damped_trend=True,
                                 seasonal="multiplicative",
                                 sp=12),
        ),
    ])

    forecaster.fit(y_train)

    expected = np.array([1 / 3, 1 / 3, 1 / 3])
    np.testing.assert_allclose(forecaster.weights, expected, rtol=1e-8)

コード例 #2

0

ファイルを表示

def load_dataset():
    y = load_airline()
    df = pd.DataFrame(y)
    df.index = df.index.to_timestamp()
    rolling_mean = df.rolling(window=12).mean()
    rolling_std = df.rolling(window=12).std()
    return (y, df, rolling_mean, rolling_std)

コード例 #3

0

ファイルを表示

def test_pred_errors_against_y_test(fh):
    """Check prediction performance on airline dataset.

    Y_test must lie in the prediction interval with coverage=0.1.

    Arguments
    ---------
    fh: ForecastingHorizon, fh at which to test prediction

    Raises
    ------
    AssertionError - if point forecasts do not lie withing the prediction intervals
    """
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    f = ThetaForecaster()
    f.fit(y_train, fh=fh)

    intervals = f.predict_interval(fh=fh, coverage=[0.1])

    y_test = y_test.iloc[check_fh(fh) - 1]

    # Performance should be good enough that all point forecasts lie within the
    # prediction intervals.
    for ints in intervals:
        if ints[1] < 0.5:
            assert np.all(y_test > intervals[ints].values)
        else:
            assert np.all(y_test <= intervals[ints].values)

コード例 #4

0

ファイルを表示

def test_boxcox_transform():
    y = load_airline()
    t = TabularToSeriesAdaptor(PowerTransformer(method="box-cox", standardize=False))
    actual = t.fit_transform(y)

    expected, _ = boxcox(np.asarray(y))  # returns fitted lambda as second output
    np.testing.assert_array_equal(actual, expected)

コード例 #5

0

ファイルを表示

ファイル: test_theta.py プロジェクト: fspinna/sktime_forked

def test_theta_1():
    # with theta = 1 Theta-line is equal to the original time-series
    y = load_airline()
    t = ThetaLinesTransformer(1)
    t.fit(y)
    actual = t.transform(y)
    np.testing.assert_array_equal(actual, y)

コード例 #6

0

ファイルを表示

ファイル: test_tune.py プロジェクト: xcon2/sktime

def test_gscv_fit(forecaster, param_dict, cv, scoring):
    param_grid = ParameterGrid(param_dict)

    y = load_airline()
    gscv = ForecastingGridSearchCV(
        forecaster, param_grid=param_dict, cv=cv, scoring=scoring
    )
    gscv.fit(y)

    # check scores
    gscv_scores = gscv.cv_results_[f"mean_test_{scoring.name}"]
    expected_scores = compute_expected_gscv_scores(
        forecaster, cv, param_grid, y, scoring
    )
    np.testing.assert_array_equal(gscv_scores, expected_scores)

    # check best parameters
    assert gscv.best_params_ == param_grid[gscv_scores.argmin()]

    # check best forecaster is the one with best parameters
    assert {
        key: value
        for key, value in gscv.best_forecaster_.get_params().items()
        if key in gscv.best_params_.keys()
    } == gscv.best_params_

コード例 #7

0

ファイルを表示

def test_pipeline():
    """Test results of TransformedTargetForecaster."""
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", ExponentTransformer()),
        ("t2", TabularToSeriesAdaptor(MinMaxScaler())),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = ExponentTransformer()
        yt = t1.fit_transform(yt)
        t2 = TabularToSeriesAdaptor(MinMaxScaler())
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)

コード例 #8

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: fspinna/sktime_forked

def test_pipeline():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", Deseasonalizer(sp=12, model="multiplicative")),
        ("t2", Detrender(PolynomialTrendForecaster(degree=1))),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)

コード例 #9

0

ファイルを表示

ファイル: test_trend.py プロジェクト: zhang-yuanye/sktime

def check_trend(degree, with_intercept):
    """Helper function to check trend"""
    y = load_airline()
    f = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept)
    f.fit(y)
    a = f.regressor_.steps[-1][1].coef_[
        ::-1]  # intercept is added in reverse order

    b = compute_expected_coefs(y, degree, with_intercept)
    np.testing.assert_allclose(a, b)

コード例 #10

0

ファイルを表示

ファイル: test_trend.py プロジェクト: zhang-yuanye/sktime

def test_linear_detrending():
    y = load_airline()

    f = PolynomialTrendForecaster(degree=1, with_intercept=True)
    t = Detrender(f)
    a = t.fit_transform(y)

    b = compute_expected_detrend(y, 1, with_intercept=True)

    np.testing.assert_allclose(a, b)

コード例 #11

0

ファイルを表示

def test_boxcox_against_scipy():
    y = load_airline()

    t = BoxCoxTransformer()
    actual = t.fit_transform(y)

    excepted, expected_lambda = boxcox(y.values)

    np.testing.assert_array_equal(actual, excepted)
    assert t.lambda_ == expected_lambda

コード例 #12

0

ファイルを表示

def test_forecaster_with_initial_level():
    y = np.log1p(load_airline())
    y_train, y_test = temporal_train_test_split(y)
    fh = np.arange(len(y_test)) + 1

    f = ThetaForecaster(initial_level=0.1, sp=12)
    f.fit(y_train)
    y_pred = f.predict(fh=fh)

    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)

コード例 #13

0

ファイルを表示

ファイル: test_theta.py プロジェクト: zerefwayne/sktime

def test_predictive_performance_on_airline():
    y = np.log1p(load_airline())
    y_train, y_test = temporal_train_test_split(y)
    fh = np.arange(len(y_test)) + 1

    f = ThetaForecaster(sp=12)
    f.fit(y_train)
    y_pred = f.predict(fh=fh)

    # Performance on this particular dataset should be reasonably good.
    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)

コード例 #14

0

ファイルを表示

ファイル: test_theta.py プロジェクト: fspinna/sktime_forked

def test_theta_0():
    # with theta = 0
    y = load_airline()
    t = ThetaLinesTransformer(0)
    t.fit(y)
    actual = t.transform(y)
    x = np.arange(y.size) + 1
    lin_regress = linregress(x, y)
    expected = lin_regress.intercept + lin_regress.slope * x

    np.testing.assert_almost_equal(actual, expected, decimal=8)

コード例 #15

0

ファイルを表示

def test_evaluate():
    y = load_airline()
    forecaster = NaiveForecaster(strategy="drift", sp=12)
    cv = ExpandingWindowSplitter(
        initial_window=24,
        step_length=24,
        fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        window_length=10,
    )
    df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy="update")
    # just making sure the function is running
    assert isinstance(df, pd.DataFrame)

コード例 #16

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: preeti13456/sktime

def test_factory_method_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    regressor = LinearRegression()
    f1 = ReducedForecaster(regressor, scitype="regressor", strategy="direct")
    f2 = DirectRegressionForecaster(regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)

コード例 #17

0

ファイルを表示

ファイル: test_theta.py プロジェクト: whynjones/sktime

def test_pred_errors_against_y_test(fh):
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)
    f = ThetaForecaster()
    f.fit(y_train, fh)
    y_pred = f.predict(return_pred_int=False)
    errors = f._compute_pred_errors(alpha=0.1)
    if isinstance(errors, pd.Series):
        errors = [errors]  # make iterable
    y_test = y_test.iloc[check_fh(fh) - 1]
    for error in errors:
        assert np.all(y_test > y_pred - error)
        assert np.all(y_test < y_pred + error)

コード例 #18

0

ファイルを表示

def test_reductions_airline_data(forecaster, expected):
    """
    test reduction forecasters by making prediction on airline dataset
    using linear estimators. predictions compared with values calculated by Lovkush
    Agarwal on their local machine in Mar 2021
    """
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    actual = forecaster.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_almost_equal(actual, expected)

コード例 #19

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: preeti13456/sktime

def test_factory_method_ts_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    ts_regressor = Pipeline([("tabularize", Tabularizer()),
                             ("model", LinearRegression())])
    f1 = ReducedForecaster(ts_regressor,
                           scitype="ts_regressor",
                           strategy="direct")
    f2 = DirectTimeSeriesRegressionForecaster(ts_regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)

コード例 #20

0

ファイルを表示

def test_guerrero_against_r_implementation(bounds, r_lambda):
    """
    Testing lambda values estimated by the R implementation of the Guerrero method
    https://github.com/robjhyndman/forecast/blob/master/R/guerrero.R
    against the guerrero method in BoxCoxTransformer.
    R code to generate the hardcoded value for bounds=(-1, 2) used in the test
    ('Airline.csv' contains the data from 'load_airline()'):
        airline_file <- read.csv(file = 'Airline.csv')[,c('Passengers')]
        airline.ts <- ts(airline_file)
        guerrero(airline.ts, lower=-1, upper=2, nonseasonal.length = 20)
    Output:
        -0.156981228426408
    """
    y = load_airline()
    t = BoxCoxTransformer(bounds=bounds, method="guerrero", sp=20)
    t.fit(y)
    np.testing.assert_almost_equal(t.lambda_, r_lambda, decimal=4)

コード例 #21

0

ファイルを表示

ファイル: test_pipeline.py プロジェクト: juanitorduz/sktime

def test_skip_inverse_transform():
    """Test transformers with skip-inverse-transform tag in pipeline."""
    y = load_airline()
    # add nan and outlier
    y.iloc[3] = np.nan
    y.iloc[4] = y.iloc[4] * 20

    y_train, y_test = temporal_train_test_split(y)
    forecaster = TransformedTargetForecaster([
        ("t1", HampelFilter(window_length=12)),
        ("t2", Imputer(method="mean")),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict()
    assert isinstance(y_pred, pd.Series)

コード例 #22

0

ファイルを表示

def test_results_consistency(levels=levels):
    """Check consistency between wrapper and statsmodels original implementation."""
    y = load_airline()
    fh_length = [3, 5, 10]
    for n in fh_length:
        fh = np.arange(n) + 1
        for level in levels:
            # Fit and predict with forecaster.
            forecaster = UnobservedComponents(level=level)
            forecaster.fit(y)
            y_pred_forecaster = forecaster.predict(fh=fh)
            # Fit train statsmodels original model.
            model = _UnobservedComponents(level=level, endog=y)
            result = model.fit(disp=0)
            y_pred_base = result.forecast(steps=n)
            assert_series_equal(left=y_pred_forecaster, right=y_pred_base)
            assert len(fh) == y_pred_forecaster.shape[0]

コード例 #23

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: preeti13456/sktime

def test_multioutput_direct_tabular():
    # multioutput and direct strategies with linear regression
    # regressor should produce same predictions
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    regressor = LinearRegression()
    f1 = MultioutputRegressionForecaster(regressor)
    f2 = DirectRegressionForecaster(regressor)

    preds1 = f1.fit(y_train, fh=fh).predict(fh)
    preds2 = f2.fit(y_train, fh=fh).predict(fh)

    # assert_almost_equal does not seem to work with pd.Series objects
    np.testing.assert_almost_equal(preds1.to_numpy(),
                                   preds2.to_numpy(),
                                   decimal=5)

コード例 #24

0

ファイルを表示

ファイル: test_theta.py プロジェクト: zerefwayne/sktime

def test_pred_errors_against_y_test(fh):
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    f = ThetaForecaster()
    f.fit(y_train, fh)

    y_pred = f.predict(return_pred_int=False)

    intervals = f.compute_pred_int(y_pred, [0.1])

    y_test = y_test.iloc[check_fh(fh) - 1]

    # Performance should be good enough that all point forecasts lie within the
    # prediction intervals.
    for ints in intervals:
        assert np.all(y_test > ints["lower"])
        assert np.all(y_test < ints["upper"])

コード例 #25

0

ファイルを表示

def test_forecaster_with_initial_level():
    """Check prediction performance on airline dataset.

    Performance on this dataset should be reasonably good.

    Raises
    ------
    AssertionError - if point forecasts do not lie close to the test data
    """
    y = np.log1p(load_airline())
    y_train, y_test = temporal_train_test_split(y)
    fh = np.arange(len(y_test)) + 1

    f = ThetaForecaster(initial_level=0.1, sp=12)
    f.fit(y_train)
    y_pred = f.predict(fh=fh)

    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)

コード例 #26

0

ファイルを表示

ファイル: test_regressors.py プロジェクト: ABostrom/sktime-dl

def test_regressor_forecasting(
        regressor=MLPRegressor(nb_epochs=SMALL_NB_EPOCHS), window_length=4
):
    """
    test a regressor used for forecasting
    """
    print("Start test_regressor_forecasting()")

    if isinstance(regressor, MCDCNNRegressor):
        regressor.nb_epochs = regressor.nb_epochs * 2

    # load univariate time series data
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=5)
    y_train = y_train[:window_length * 2]

    # specify forecasting horizon
    fh = np.arange(len(y_test)) + 1

    # solve forecasting task via reduction to time series regression
    forecaster = RecursiveTimeSeriesRegressionForecaster(
        estimator=regressor, window_length=window_length
    )
    forecaster.fit(y_train)
    y_pred = forecaster.predict(fh)

    try:
        mse = np.sqrt(mean_squared_error(y_test, y_pred))
        print("Error:", mse)
    except ValueError:
        if isinstance(regressor, MCDCNNRegressor):
            print(
                "Warning: MCDCNNRegressor produced NaN predictions. This is a "
                "known problem brought about by insufficient data/learning. "
                "For now, we accept that this particular network produced "
                "predictions at all (even NaNs) as passing for this "
                "particular test. Providing more data/epochs risks slowing "
                "down tests too much.")
        else:
            # unexpected error in all other cases
            raise

    print("End test_regressor_forecasting()")

コード例 #27

0

ファイルを表示

ファイル: test_reduce.py プロジェクト: Nancyyes/sktime

def test_dirrec_against_recursive_accumulated_error():
    # recursive and dirrec regressor strategies
    # dirrec regressor should produce lower error due to less cumulative error
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    estimator = LinearRegression()
    recursive = make_reduction(estimator,
                               scitype="tabular-regressor",
                               strategy="recursive")
    dirrec = make_reduction(estimator,
                            scitype="tabular-regressor",
                            strategy="dirrec")

    preds_recursive = recursive.fit(y_train, fh=fh).predict(fh)
    preds_dirrec = dirrec.fit(y_train, fh=fh).predict(fh)

    assert smape_loss(y_test, preds_dirrec) < smape_loss(
        y_test, preds_recursive)

コード例 #28

0

ファイルを表示

ファイル: test_online_learning.py プロジェクト: fspinna/sktime_forked

def test_weights_for_airline_nnls():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    hedge_expert = NNLSEnsemble(n_estimators=3, loss_func=mean_squared_error)

    forecaster = OnlineEnsembleForecaster(
        [
            ("av5", NaiveForecaster(strategy="mean", window_length=5)),
            ("av10", NaiveForecaster(strategy="mean", window_length=10)),
            ("av20", NaiveForecaster(strategy="mean", window_length=20)),
        ],
        ensemble_algorithm=hedge_expert,
    )

    forecaster.fit(y_train)
    forecaster.update_predict(y_test)

    expected = np.array([0.04720766, 0, 1.03410876])
    np.testing.assert_allclose(forecaster.weights, expected, atol=1e-8)

コード例 #29

0

ファイルを表示

def test_weights_for_airline_normal_hedge():
    """Test weights."""
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    hedge_expert = NormalHedgeEnsemble(n_estimators=3, loss_func=mean_squared_error)

    forecaster = OnlineEnsembleForecaster(
        [
            ("av5", NaiveForecaster(strategy="mean", window_length=5)),
            ("av10", NaiveForecaster(strategy="mean", window_length=10)),
            ("av20", NaiveForecaster(strategy="mean", window_length=20)),
        ],
        ensemble_algorithm=hedge_expert,
    )

    forecaster.fit(y_train)
    forecaster.update_predict(y=y_test, cv=cv, reset_forecaster=False)

    expected = np.array([0.17077154, 0.48156709, 0.34766137])
    np.testing.assert_allclose(forecaster.weights, expected, atol=1e-8)

コード例 #30

0

ファイルを表示

ファイル: test_tune.py プロジェクト: xcon2/sktime

def test_rscv_fit(forecaster, param_dict, cv, scoring, n_iter, random_state):
    """Tests that ForecastingRandomizedSearchCV successfully searches the
    parameter distributions to identify the best parameter set
    """
    # samples uniformly from param dict values
    param_distributions = ParameterSampler(
        param_dict, n_iter, random_state=random_state
    )

    y = load_airline()
    rscv = ForecastingRandomizedSearchCV(
        forecaster,
        param_distributions=param_dict,
        cv=cv,
        scoring=scoring,
        n_iter=n_iter,
        random_state=random_state,
    )
    rscv.fit(y)

    # check scores
    rscv_scores = rscv.cv_results_[f"mean_test_{scoring.name}"]
    # convert ParameterSampler to list to ensure consistent # of scores
    expected_scores = compute_expected_gscv_scores(
        forecaster, cv, list(param_distributions), y, scoring
    )
    np.testing.assert_array_equal(rscv_scores, expected_scores)

    # check best parameters
    assert rscv.best_params_ == list(param_distributions)[rscv_scores.argmin()]

    # check best forecaster is the one with best parameters
    assert {
        key: value
        for key, value in rscv.best_forecaster_.get_params().items()
        if key in rscv.best_params_.keys()
    } == rscv.best_params_