def transform(self, X, y=None) -> numpy.array: """ Return the fractional differentiation of `X`. Parameters ---------- X : array_like, shape (n_samples, n_series) Time-series to perform fractional differentiation. Raises ValueError if `n_samples < self.window_`. y : array_like, optional Ignored. Returns ------- fdiff : ``numpy.array``, shape (n_samples, n_series) The fractional differentiation of `X`. """ check_is_fitted(self, ["d_"]) check_array(X) prototype = Fracdiff(0.5, window=self.window, mode=self.mode).fit_transform(X) out = numpy.empty_like(prototype[:, :0]) for i in range(X.shape[1]): f = Fracdiff(self.d_[i], window=self.window, mode=self.mode) d = f.fit_transform(X[:, [i]])[-out.shape[0]:] out = numpy.concatenate((out, d), 1) return out
def test_mul(order, seed, n_samples, n_series, a): """ Test `D(a * X) = a * D(X)`. """ np.random.seed(seed) X = make_X(n_samples, n_series) D1 = Fracdiff(order).transform(X) Da = Fracdiff(order).transform(a * X) assert np.allclose(a * D1, Da, equal_nan=True)
def test_add(order, seed, n_samples, n_series): """ Test `D(X1 + X2) = D(X1) + D(X2)`. """ np.random.seed(seed) X1 = make_X(n_samples, n_series) X2 = make_X(n_samples, n_series) D1 = Fracdiff(order).transform(X1) D2 = Fracdiff(order).transform(X2) DA = Fracdiff(order).transform(X1 + X2) assert np.allclose(D1 + D2, DA, equal_nan=True)
def test_change_d(d, window, n_blanks_1, n_blanks_2, n_terms, n_series): """ Test the correctness of coefficients. """ X = make_X(window, n_blanks_1, n_blanks_2, n_terms, n_series) fracdiff = Fracdiff(0.42, window=window) _ = fracdiff.transform(X) fracdiff.d = d Xd = fracdiff.transform(X) Xd_expected = Fracdiff(d, window=window).transform(X) assert np.allclose(Xd, Xd_expected, equal_nan=True)
def invert_diff(df_forecast, columns_diff_dict): df_fc = df_forecast.copy() columns = columns_diff_dict.keys() for col in columns: f = Fracdiff(d=-columns_diff_dict[col][0], window=columns_diff_dict[col][1]) diff = f.fit_transform(df_forecast[col].values.reshape(-1, 1)) df_fc[col] = pd.Series(diff.squeeze()) return df_fc
def howto_spx(): spx = fetch_price('^GSPC') window = 100 fracdiff = Fracdiff(0.5, window=window) spx_diff = fracdiff.transform(spx.values.reshape(-1, 1)) spxd = pd.Series(spx_diff[:, 0], index=spx.index) plot_spx(spx[window:], spxd[window:])
def test_tol_memory(d, tol_memory): fracdiff = Fracdiff(d, window=None, tol_memory=tol_memory) try: fracdiff.transform(X) window = fracdiff.window_ if d > 1: d -= floor(d) assert abs(lost_memory(d, window)) < abs(tol_memory) except RuntimeWarning: # saturation pass
def test_transform_twice(d, window, n_blanks_1, n_blanks_2, n_terms, n_series): """ Test the correctness of coefficients. """ X = make_X(window, n_blanks_1, n_blanks_2, n_terms, n_series) fracdiff = Fracdiff(d, window=window) Xd1 = fracdiff.transform(X) Xd2 = fracdiff.transform(X) assert np.allclose(Xd1, Xd2, equal_nan=True)
def test_sample_pipeline(self, seed, n_samples, n_features, d): np.random.seed(seed) X = np.random.randn(n_samples, n_features) y = np.random.randn(n_samples) pipeline = Pipeline([ ("scaler", StandardScaler()), ("fracdiff", Fracdiff(d)), ("regressor", LinearRegression()), ]) pipeline.fit(X, y)
def test_tol_coef(d, tol_coef): fracdiff = Fracdiff(d, window=None, tol_coef=tol_coef) try: fracdiff.transform(X) window = fracdiff.window_ if d.is_integer(): assert window == d + 1 else: if d > 1: d -= floor(d) assert abs(last_coef(d, window)) < abs(tol_coef) except RuntimeWarning: # saturation pass
def test_coef(d, window, n_blanks_1, n_blanks_2, n_terms, n_series): """ Test the correctness of coefficients. """ X = make_X(window, n_blanks_1, n_blanks_2, n_terms, n_series) fracdiff = Fracdiff(d, window=window) Xd = fracdiff.transform(X) coef_expected = fracdiff.coef_ for i in range(n_series): coef = Xd[window + n_blanks_1:, i][:n_terms] assert np.allclose(coef, coef_expected)
def test_order(self, window, mode, precision): np.random.seed(42) X = np.random.randn(1000, 10).cumsum(0) fs = FracdiffStat(mode=mode, window=window, precision=precision) fs.fit(X) X_st = fs.transform(X) X_ns = np.empty_like(X_st[:, :0]) for i in range(X.shape[1]): f = Fracdiff(fs.d_[i] - precision, mode=mode, window=window) X_ns = np.concatenate((X_ns, f.fit_transform(X[:, [i]])), 1) for i in range(X.shape[1]): assert self._is_stat(X_st[:, i]) assert not self._is_stat(X_ns[:, i])
def test_transform(self, window, mode, precision): """ Test if `FracdiffStat.transform` works for array with n_features > 1. """ np.random.seed(42) X = np.random.randn(100, 10).cumsum(0) fs = FracdiffStat(window=window, mode=mode, precision=precision).fit(X) out = fs.transform(X) exp = np.empty_like(out[:, :0]) for i in range(X.shape[1]): f = Fracdiff(fs.d_[i], mode=mode, window=window) exp = np.concatenate((exp, f.fit_transform(X[:, [i]])), 1) assert_allclose(out, exp)
def test_transform(seed, n_samples, n_features, window): """ Test if `StationaryFracdiff.transform` works for array with n_features > 1. """ X = make_nonstationary(seed, n_samples, n_features) statfracdiff = StationaryFracdiff(window=window).fit(X) order = statfracdiff.order_ Xd = statfracdiff.transform(X)[window:, :] Xd_expected = np.concatenate([ Fracdiff(order[i], window).transform(X[:, [i]])[window:, :] for i in range(n_features) ], axis=1) assert np.allclose(Xd, Xd_expected, equal_nan=True)
def test_order(seed, n_samples, n_features, window, precision): """ Test if `StationaryFracdiff.order_` is the lowest order to make the differentiation stationary for array with `n_features > 1`. """ X = make_nonstationary(seed, n_samples, n_features) statfracdiff = StationaryFracdiff(window=window, precision=precision) statfracdiff.fit(X) order = statfracdiff.order_ Xd_stat = statfracdiff.transform(X)[window:, :] Xd_nonstat = np.concatenate([ Fracdiff(order[i] - precision, window).transform(X[:, [i]])[window:, :] for i in range(n_features) ], axis=1) for i in range(n_features): assert is_stat(Xd_stat[:, i]) assert not is_stat(Xd_nonstat[:, i])
import seaborn sys.path.append("../..") from fracdiff import Fracdiff # noqa: E402 def fetch_spx(): return pandas_datareader.data.DataReader( "^GSPC", "yahoo", "1999-10-01", "2020-09-30" )["Adj Close"] if __name__ == "__main__": s = fetch_spx() f = Fracdiff(0.5, window=100, mode="valid") d = f.fit_transform(s.values.reshape(-1, 1)).reshape(-1) s = s[100 - 1 :] d = pd.Series(d, index=s.index) seaborn.set_style("white") fig, ax_s = plt.subplots(figsize=(16, 8)) ax_d = ax_s.twinx() plot_s = ax_s.plot(s, color="blue", linewidth=0.6, label="S&P 500 (left)") plot_d = ax_d.plot( d, color="orange", linewidth=0.6, label="S&P 500, 0.5th differentiation (right)", )
def test_small_n_samples(): fracdiff = Fracdiff(window=100) with pytest.raises(ValueError): fracdiff.transform(np.zeros((10, 2)))
def test_fracdiff_tol_coef(tol_coef): with pytest.raises(ValueError): fracdiff = Fracdiff(tol_coef=tol_coef) fracdiff.transform(X)
def test_saturation(): small_tolerance = 2**(-20) fracdiff = Fracdiff(0.5, window=None, tol_memory=small_tolerance) with pytest.raises(RuntimeWarning): fracdiff.transform(X)
def test_fracdiff_noparams(): with pytest.raises(ValueError): fracdiff = Fracdiff(window=None, tol_coef=None, tol_memory=None) fracdiff.transform(X)
def test_fracdiff_tol_memory(tol_memory): with pytest.raises(ValueError): fracdiff = Fracdiff(tol_memory=tol_memory) fracdiff.transform(X)
def test_fracdiff_d(d): with pytest.raises(ValueError): Fracdiff(d).transform(X)
def test_fracdiff_window(window): with pytest.raises(ValueError): Fracdiff(window=window).transform(X)
def test_repr(self): fracdiff = Fracdiff(0.5, window=10, mode="full", window_policy="fixed") expected = "Fracdiff(d=0.5, window=10, mode=full, window_policy=fixed)" assert repr(fracdiff) == expected
def test_transform(self, d, window, mode): np.random.seed(42) X = np.random.randn(100, 200) fracdiff = Fracdiff(d=d, window=window, mode=mode) out = fdiff(X, n=d, axis=0, window=window, mode=mode) assert_array_equal(fracdiff.fit_transform(X), out)
def test_sample_fit_transform(self, seed, n_samples, n_features, d): np.random.seed(seed) X = np.random.randn(n_samples, n_features) _ = Fracdiff(d).fit_transform(X)
def last_coef(d, window): return Fracdiff(d, window=window)._fit().coef_[-1]
def lost_memory(d, window): coef = Fracdiff(d, window=LARGE_NUMBER)._fit().coef_ return np.sum(coef[window + 1:])
def diff(d): fracdiff = Fracdiff(d, window=self.window, mode=self.mode) return fracdiff.fit_transform(x.reshape(-1, 1)).reshape(-1)
def test_coef(d, window, n_terms): coef = Fracdiff(d, window=window)._fit().coef_ coef_expected = get_coefs(d, n_terms) assert np.allclose(coef, coef_expected)