Esempio n. 1
0
def features1():
    return [
        ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
        ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
        (
            "moving_average_3",
            MovingAverage(window_size=3),
            make_column_selector(dtype_include=np.number),
        ),
    ]
Esempio n. 2
0
    def test_multi_columns_time_shift_feature(self):
        shift = Shift(shift=-2)
        df_multi = pd.DataFrame({"x0": [0, 1, 2, 3, 4, 5], "x1": [7, 8, 9, 10, 11, 12]})

        expected_df = pd.DataFrame.from_dict(
            {
                f"x0__{shift_class_name}": [2, 3, 4, 5, np.nan, np.nan],
                f"x1__{shift_class_name}": [9, 10, 11, 12, np.nan, np.nan],
            }
        )

        testing.assert_frame_equal(shift.fit_transform(df_multi), expected_df)
Esempio n. 3
0
def horizon_shift(time_series: pd.DataFrame,
                  horizon: Union[int, List[int]] = 5) -> pd.DataFrame:
    """Perform a shift of the original ``time_series`` for each time step between 1 and
    ``horizon``.

    Parameters
    ----------
    time_series : pd.DataFrame, shape (n_samples, n_features), required
        The list of ``TimeSeriesFeature`` from which to compute the feature_extraction.

    horizon : int, optional, default: ``5``
        It represents how much into the future is necessary to predict. This corresponds
        to the number of shifts that are going to be performed on y.
        
    Returns
    -------
    y : pd.DataFrame, shape (n_samples, horizon)
        The shifted time series.

    Examples
    --------
    >>> import pandas as pd
    >>> from gtime.model_selection import horizon_shift
    >>> X = pd.DataFrame(range(0, 5), index=pd.date_range("2020-01-01", "2020-01-05"))
    >>> horizon_shift(X, horizon=2)
                y_1  y_2
    2020-01-01  1.0  2.0
    2020-01-02  2.0  3.0
    2020-01-03  3.0  4.0
    2020-01-04  4.0  NaN
    2020-01-05  NaN  NaN
    >>> horizon_shift(X, horizon=[2])
                y_2
    2020-01-01  2.0
    2020-01-02  3.0
    2020-01-03  4.0
    2020-01-04  NaN
    2020-01-05  NaN

    """
    horizon = range(1, horizon + 1) if isinstance(horizon,
                                                  (int, float)) else horizon
    y = pd.DataFrame(index=time_series.index)
    for k in sorted(horizon):
        shift_feature = Shift(-k)
        y[f"y_{k}"] = shift_feature.fit_transform(time_series)

    return y
Esempio n. 4
0
 def __init__(self, p: int, horizon: int):
     features = [
         tuple((f"s{i}", Shift(i),
                make_column_selector(dtype_include=np.number)))
         for i in range(1, p + 1)
     ]
     model = GAR(LinearRegression())
     super().__init__(features=features, horizon=horizon, model=model)
Esempio n. 5
0
 def __init__(self, horizon: int, seasonal_length: int):
     features = [
         ("s1", Shift(0), make_column_selector()),
     ]
     super().__init__(
         features=features,
         horizon=horizon,
         model=SeasonalNaiveForecaster(seasonal_length),
     )
Esempio n. 6
0
 def __init__(
     self,
     p: int,
     horizon: Union[int, List[int]],
     explainer_type: Optional[str] = None,
 ):
     self.p = p
     self.explainer_type = explainer_type
     features = [
         tuple((f"s{i}", Shift(i), make_column_selector(dtype_include=np.number)))
         for i in range(p)
     ]
     model = GAR(LinearRegression(), explainer_type=explainer_type)
     super().__init__(features=features, horizon=horizon, model=model)
def test_feature_creation_transform():
    data = testing.makeTimeDataFrame(freq="s")

    shift = Shift(1)
    ma = MovingAverage(window_size=3)

    col_name = 'A'

    fc = FeatureCreation([
        ('s1', shift, [col_name]),
        ('ma3', ma, [col_name]),
    ])
    res = fc.fit(data).transform(data)

    assert_array_equal(res.columns.values, [
        f's1__{col_name}__{shift.__class__.__name__}',
        f'ma3__{col_name}__{ma.__class__.__name__}'
    ])
from gtime.utils.hypothesis.feature_matrices import (
    X_y_matrices,
    X_matrices,
    y_matrices,
    numpy_X_y_matrices,
    numpy_X_matrices,
)
from gtime.utils.hypothesis.general_strategies import (
    shape_X_y_matrices,
    ordered_pair,
    shape_matrix,
)

df_transformer = FeatureCreation(
    [
        ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
        ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
        (
            "moving_average_3",
            MovingAverage(window_size=3),
            make_column_selector(dtype_include=np.number),
        ),
    ]
)


class TestXyMatrices:
    @given(X_y_matrices(horizon=3, df_transformer=df_transformer))
    def test_X_shape_correct(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]):
        X, y = X_y
        assert X.shape[1] == len(df_transformer.transformers_)
Esempio n. 9
0
def features2():
    return [
        ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)),
        ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)),
    ]
Esempio n. 10
0
 def __init__(self, horizon: int):
     features = [
         ("s1", Shift(0), make_column_selector()),
     ]
     super().__init__(features=features, horizon=horizon, model=AverageForecaster())
Esempio n. 11
0
    def test_random_ts_and_shifts(self, df: pd.DataFrame, shift: int):
        shift_feature = Shift(shift=shift)

        df_shifted = shift_feature.fit_transform(df)
        correct_df_shifted = self._correct_shift(df, shift)
Esempio n. 12
0
 def test_shift_transform(self, shift, expected):
     shift = Shift(shift=shift)
     testing.assert_frame_equal(shift.fit_transform(df), expected)
Esempio n. 13
0
from hypothesis import given, strategies as st
from hypothesis.extra.pandas import column, data_frames

from gtime.feature_extraction import (
    Shift,
    MovingAverage,
    Exogenous,
    Polynomial,
    CustomFeature,
    MovingCustomFunction,
)
from gtime.utils.hypothesis.time_indexes import giotto_time_series

df = pd.DataFrame.from_dict({"x": [0, 1, 2, 3, 4, 5]})

shift_class_name = Shift().__class__.__name__
df_shift_1 = pd.DataFrame.from_dict({f"x__{shift_class_name}": [np.nan, 0, 1, 2, 3, 4]})
df_shift_m2 = pd.DataFrame.from_dict(
    {f"x__{shift_class_name}": [2, 3, 4, 5, np.nan, np.nan]}
)
df_shift_0 = pd.DataFrame.from_dict({f"x__{shift_class_name}": [0, 1, 2, 3, 4, 5]})


# FIXME: shift a + shift b = shift a+b instead
class TestShift:
    def _correct_shift(self, df: pd.DataFrame, shift: int) -> pd.DataFrame:
        return df.shift(shift)

    @pytest.mark.parametrize(
        ("shift", "expected"), [(1, df_shift_1), (-2, df_shift_m2), (0, df_shift_0)]
    )
Esempio n. 14
0
time_series = TimeSeries()

# You can plot
time_series.plot()

# Decomposition
## Un peu bizarre le plot_stl() et deux fois stl_decomposition
time_series = time_series.stl_decomposition()
time_series.plot_stl()
time_series = time_series.recompose()  # Choose a good name

# Box-Cox
time_series = time_series.box_cox(lambda_=0.3)

# Feature forecasting
features = [("shift", Shift(1), "time_series")]
automatic_features = get_features()  # Similar to fast.ai get_transforms()
gar_forecaster = LinearRegression()
# This object TimeSeriesForecastingModel keeps into account all the intermediate steps.
# You don't need to manually deal with train/test split, etc..
forecasting_model = TimeSeriesForecastingModel(
    features=features, horizon=3, model=gar_forecaster
)
forecasting_model = forecasting_model.fit(time_series)
forecasting_model.predict()
forecasting_model.cross_validate()  # Is cross validation also on multiple time series?

# Residuals analysis
forecasting_model.residuals_.acf()

# Questions
Esempio n. 15
0
class TestCVPipeline:
    @given(
        models=models_grid(),
        n_splits=st.integers(min_value=2, max_value=10),
        blocking=st.booleans(),
        metrics=metrics(),
    )
    def test_constructor(self, models, n_splits, blocking, metrics):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 blocking=blocking,
                                 metrics=metrics)
        list_len = np.sum(
            [np.prod([len(y) for y in x.values()]) for x in models.values()])
        assert list_len == len(cv_pipeline.model_list)
        assert len(metrics) == len(cv_pipeline.metrics)

    @pytest.mark.parametrize("models", [{
        Naive: {
            "horizon": [3]
        },
        AR: {
            "horizon": [3],
            "p": [2, 3]
        }
    }])
    @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
    @pytest.mark.parametrize("n_splits", [3, 5])
    @pytest.mark.parametrize("blocking", [True, False])
    @pytest.mark.parametrize("seed", [5, 1000])
    def test_fit_predict(self, models, n_splits, blocking, metrics, seed):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 blocking=blocking,
                                 metrics=metrics)
        np.random.seed(seed)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list) * len(metrics),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)

    @pytest.mark.parametrize(
        "models",
        [{
            TimeSeriesForecastingModel: {
                "features": [
                    [("s3", Shift(1), ["1"])],
                    [("ma10", MovingAverage(10), ["1"])],
                ],
                "horizon": [4],
                "model": [NaiveForecaster(),
                          DriftForecaster()],
            }
        }],
    )
    @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}])
    @pytest.mark.parametrize("n_splits", [5])
    def test_model_assembly(self, models, n_splits, metrics):
        cv_pipeline = CVPipeline(models_sets=models,
                                 n_splits=n_splits,
                                 metrics=metrics)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list) * len(metrics),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)

    @pytest.mark.parametrize("models", [{
        Naive: {
            "horizon": [3]
        },
        AR: {
            "horizon": [3],
            "p": [2, 3]
        }
    }])
    @pytest.mark.parametrize("refit",
                             ["all", "best", ["Naive: {'horizon': 3}"]])
    def test_models_refit(self, models, refit):
        cv_pipeline = CVPipeline(models_sets=models)
        idx = pd.period_range(start="2011-01-01", end="2012-01-01")
        df = pd.DataFrame(np.random.standard_normal((len(idx), 1)),
                          index=idx,
                          columns=["1"])
        cv_pipeline.fit(df, refit=refit)
        assert cv_pipeline.cv_results_.shape == (
            len(cv_pipeline.model_list),
            4,
        )
        y_pred = cv_pipeline.predict()
        horizon = cv_pipeline.best_model_.horizon
        assert y_pred.shape == (horizon, horizon)