def test_fit_remove_add_trend(order, n_instances, n_timepoints): coefs = np.random.normal(size=order + 1).reshape(-1, 1) x = np.column_stack( [ _generate_polynomial_series(n_timepoints, order, coefs=coefs) for _ in range(n_instances) ] ).T # assert x.shape == (n_samples, n_obs) # check shape of fitted coefficients coefs = fit_trend(x, order=order) assert coefs.shape == (n_instances, order + 1)
def test_fit_remove_add_trend(order, n_samples, n_obs): # generate random polynomial series data coefs = np.random.normal(size=order + 1).reshape(-1, 1) x = np.column_stack([ generate_polynomial_series(n_obs, order, coefs=coefs) for _ in range(n_samples) ]).T # assert x.shape == (n_samples, n_obs) # check shape of fitted coefficients coefs = fit_trend(x, order=order) assert coefs.shape == (n_samples, order + 1) # test if trend if properly remove when given true order xt = remove_trend(x, coefs) np.testing.assert_array_almost_equal(xt, np.zeros(x.shape)) # test inverse transform restores original series xit = add_trend(xt, coefs=coefs) np.testing.assert_array_almost_equal(x, xit)
def transform(self, X, y=None): """Transform X. Parameters ---------- X : nested pandas DataFrame of shape [n_samples, n_features] Nested dataframe with time-series in cells. Returns ------- Xt : pandas DataFrame Transformed pandas DataFrame with same number of rows and one column for each generated interval. """ if self.check_input: if not isinstance(X, pd.DataFrame): raise ValueError(f"Input must be pandas DataFrame, but found: {type(X)}") if X.shape[1] > 1: raise NotImplementedError(f"Currently does not work on multiple columns") self._input_shape = X.shape # keep time index as trend depends on it, e.g. to carry forward trend in inverse_transform self._time_index = get_time_index(X) # convert into tabular format tabulariser = Tabulariser() Xs = tabulariser.transform(X.iloc[:, :1]) # fit polynomial trend self.coefs_ = fit_trend(Xs, order=self.order) # remove trend Xt = remove_trend(Xs, coefs=self.coefs_, time_index=self._time_index) # convert back into nested format Xt = tabulariser.inverse_transform(pd.DataFrame(Xt)) Xt.columns = X.columns return Xt
def _compute_trend(y): # Trend calculated through least squares regression. coefs = fit_trend(y.values.reshape(1, -1), order=1) return coefs[0, 0] / 2