def priceActual(): for k,v in priceDic.items(): print(k,v) series = pd.read_csv('../Data/Final/Wholesale/'+k,names = [0.1],index_col=0,header=None) near_file = v[:-4]+'.npy' near = np.load('../Data/Results/Near/'+near_file) daysToForecast = 30 series_train = np.squeeze(series.values) n = len(series_train) near_train = near[:n] near_test = near[-30:] trans = FourierFeaturizer(365.25, 1) y_prime, exogen = trans.fit_transform(series_train) exogen = exogen.mul(pd.Series(series_train),axis=0) futureExog = trans.transform(y = series_train, n_periods = 30) futureExog = pd.DataFrame(futureExog[1]) futureExog = futureExog.mul(pd.Series(near_test),axis=0) exogen['near'] = near_train futureExog['near'] = near_test #print('MODel searching') model=pm.arima.auto_arima(series_train, exogenous = exogen, start_p=0, d=None, start_q=0, max_p=3, max_d=1, max_q=3,start_P=0, D=None, start_Q=0, max_P=2, max_D=1, max_Q=2,suppress_warnings =True,seasonal=True,max_order=4,m=7,stepwise=True) model.fit(series_train) pred = (model.predict(daysToForecast,exogenous = futureExog)) series = np.concatenate((series_train,pred),axis=0) series = pd.DataFrame(series) series.index = pd.date_range(start = '2006-01-01',periods = len(series)) fileName = '../Data/Results/Actual/'+str(k) #print(fileName) series.to_csv(fileName)
def test_pipeline_behavior(): wineind = load_wineind() train, test = wineind[:125], wineind[125:] pipeline = Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]) # Quick assertions on indexing assert len(pipeline) == 2 pipeline.fit(train) preds = pipeline.predict(5) assert preds.shape[0] == 5 assert pipeline._final_estimator.model_.fit_with_exog_ # Assert that when the n_periods kwarg is set manually and incorrectly for # the fourier transformer, we get a ValueError kwargs = { "fourier__n_periods": 10 } with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0]
class TestIllegal: def test_non_unique_names(self): # Will fail since the same name repeated twice with pytest.raises(ValueError) as ve: Pipeline([ ("stage", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "not unique" in pytest_error_str(ve) def test_names_in_params(self): # Will fail because 'steps' is a param of Pipeline with pytest.raises(ValueError) as ve: Pipeline([ ("steps", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "names conflict" in pytest_error_str(ve) def test_names_double_underscore(self): # Will fail since the "__" is reserved for parameter names with pytest.raises(ValueError) as ve: Pipeline([ ("stage__1", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "must not contain __" in pytest_error_str(ve) def test_non_transformer_in_steps(self): # Will fail since the first stage is not a transformer with pytest.raises(TypeError) as ve: Pipeline([ ("stage1", (lambda *args, **kwargs: None)), # Fail ("stage2", AutoARIMA()) ]) assert "instances of BaseTransformer" in pytest_error_str(ve) @pytest.mark.parametrize( 'stages', [ # Nothing BUT a transformer [("stage1", BoxCoxEndogTransformer())], # Two transformers [("stage1", BoxCoxEndogTransformer()), ("stage2", FourierFeaturizer(m=12))] ] ) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve)
def forecast(series,near): train_near = np.squeeze(near.values[:-30]) train_series = np.squeeze(series.values[:len(train_near)]) test_near = np.squeeze(near.values[-30:]) test_series = np.squeeze(series.values[-30:]) df = pd.DataFrame(columns=['aic','nonSeasonal','seasonal','k']) for i in range(25): print('value of i is:',i) nonSeasonalParams = ch(nonSeasonal) seasonalParams = ch(seasonal) val = sum(list(nonSeasonalParams)) + sum(list(seasonalParams)) #try: if(val>8): continue seasonalParams = seasonalParams + (7,) #print(nonSeasonalParams,seasonalParams) try: trans = FourierFeaturizer(365.25, 1) y_prime, exogen = trans.fit_transform(train_series) #exogen = exogen.mul(pd.Series(train_series),axis=0) exogen['near'] = train_near model = sm.tsa.statespace.SARIMAX(endog = train_series, exog = train_near, order = nonSeasonalParams, seasonal_order = seasonalParams,initialization='approximate_diffuse',enforce_stationarity=False) res = model.fit(disp=False) #print(res.aic) to_append = [res.aic,nonSeasonalParams,seasonalParams,1] a_series = pd.Series(to_append, index = df.columns) df = df.append(a_series, ignore_index=True) except: print('inside except block now.....') x = pd.Series([10000000,(1,1,1),(1,1,1),1],index = df.columns) df = df.append(x, ignore_index=True) x = pd.Series([10000000,(1,1,1),(1,1,1),1],index = df.columns) df = df.append(x, ignore_index=True) print('df is :',df) dx = (df[df.aic == df.aic.min()]) dx.reset_index(inplace=True,drop=True) print('baest parameters are:',dx) value = [dx.iloc[0][0],dx.iloc[0][1],dx.iloc[0][2],dx.iloc[0][3]] print('final parameters are:',value) return value
def test_order_does_not_matter_with_date_transformer(): train_y_dates, test_y_dates, train_X_dates, test_X_dates = \ train_test_split(y_dates, X_dates, test_size=15) pipeline_a = Pipeline([ ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_a = pipeline_a.transform(exogenous=test_X_dates) pred_a = pipeline_a.predict(exogenous=test_X_dates) pipeline_b = Pipeline([ ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_b = pipeline_b.transform(exogenous=test_X_dates) pred_b = pipeline_b.predict(exogenous=test_X_dates) # dates in A should differ from those in B assert pipeline_a.x_feats_[0].startswith("FOURIER") assert pipeline_a.x_feats_[-1].startswith("DATE") assert pipeline_b.x_feats_[0].startswith("DATE") assert pipeline_b.x_feats_[-1].startswith("FOURIER") # columns should be identical once ordered appropriately assert Xt_a.equals(Xt_b[pipeline_a.x_feats_]) # forecasts should be identical assert_array_almost_equal(pred_a, pred_b, decimal=3)
def basic_pipeline(data): pipeline = Pipeline(steps=[ ("fourier", FourierFeaturizer(k=3, m=7)), ("arima", AutoARIMA(out_of_sample_size=60)), ]) return GroupedPmdarima(pipeline).fit( data.df, data.key_columns, "y", "ds", )
def _tune(self, y, period, x=None, metric="mse", val_size=None, verbose=False): """ Tune hyperparameters of the model. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: not used for TBATS model :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute error). :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.period = data_utils.period_to_int(period) if type( period) == str else period val_size = int(len(y) * .1) if val_size is None else val_size pipe = pipeline.Pipeline([ ("fourier", FourierFeaturizer( self.period, self.period / 2)), # TODO: Tune no. of Fourier terms as well? ("arima", auto_arima(y, m=self.period, seasonal=False, d=None, information_criterion='oob', maxiter=100, error_action='ignore', suppress_warnings=True, stepwise=True, max_order=None, out_of_sample_size=val_size, scoring=metric, exogenous=x)) ]) self.params.update(pipe.steps[1][1].get_params()) self.params["tuned"] = True
def fit(self, y, period, x=None, metric="mse", val_size=None, verbose=False): """ Build the model using best-tuned hyperparameter values. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute error). :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.y = y self.name = "Fourier ARIMA" self.key = "fourier_sarima" self._tune(y=y, period=period, x=x, metric=metric, val_size=val_size, verbose=verbose) pipe = pipeline.Pipeline([ ("fourier", FourierFeaturizer(self.period, self.period / 2)), ("arima", arima.ARIMA(maxiter=100, order=self.params["order"], seasonal_order=self.params["seasonal_order"], suppress_warnings=True)) ]) self.model = pipe.fit(y, exogenous=x)
import numpy as np from unittest import mock y = load_wineind() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=150, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1)), ARIMA( order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=3))]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y, exogenous=exog, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray) @pytest.mark.parametrize('cv', [
@pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=120, step=12, h=1), ]) @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), maxiter=2, simple_differencing=True, suppress_warnings=True), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=2, simple_differencing=True)) ]) ] ) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('X', [None, exogenous]) def test_cv_scores(cv, est, verbose, X): scores = cross_val_score( est, y, X=X, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray)
index_col=0, header=None) near = pd.read_csv('../Data/Final/Wholesale/YeolaPrice.csv', names=[0.1], index_col=0, header=None) train_near = np.squeeze(near.values[:-30]) train_series = np.squeeze(series.values[:len(train_near)]) test_near = np.squeeze(near.values[-30:]) test_series = np.squeeze(series.values[-30:]) for k in range(1, 4): print(k) trans = FourierFeaturizer(365.25, k) y_prime, exogen = trans.fit_transform(train_series) exogen = exogen.mul(pd.Series(train_series), axis=0) exogen['near'] = train_near model = pm.arima.auto_arima(train_series, exogenous=pd.DataFrame(exogen), start_p=0, d=None, start_q=0, max_p=5, max_d=2, max_q=5, start_P=0, D=None, start_Q=0, max_P=5,