Exemple #1
0
def priceActual():
	for k,v in priceDic.items():
		print(k,v)
		series = pd.read_csv('../Data/Final/Wholesale/'+k,names = [0.1],index_col=0,header=None)
		near_file = v[:-4]+'.npy'
		near = np.load('../Data/Results/Near/'+near_file)
		daysToForecast = 30
		series_train = np.squeeze(series.values)
		n = len(series_train)
		near_train =  near[:n]
		near_test = near[-30:]
		trans = FourierFeaturizer(365.25, 1)
		y_prime, exogen = trans.fit_transform(series_train)
		exogen = exogen.mul(pd.Series(series_train),axis=0)
		futureExog =  trans.transform(y = series_train, n_periods = 30)
		futureExog = pd.DataFrame(futureExog[1])
		futureExog = futureExog.mul(pd.Series(near_test),axis=0)
		exogen['near'] = near_train
		futureExog['near'] = near_test
		#print('MODel searching')
		model=pm.arima.auto_arima(series_train, exogenous = exogen, start_p=0, d=None, start_q=0, max_p=3, max_d=1, max_q=3,start_P=0, D=None, start_Q=0, max_P=2, max_D=1, max_Q=2,suppress_warnings =True,seasonal=True,max_order=4,m=7,stepwise=True) 
		model.fit(series_train)
		pred = (model.predict(daysToForecast,exogenous = futureExog))
		series = np.concatenate((series_train,pred),axis=0)
		series = pd.DataFrame(series)
		series.index = pd.date_range(start = '2006-01-01',periods = len(series))
		fileName = '../Data/Results/Actual/'+str(k)
		#print(fileName)
		series.to_csv(fileName)
Exemple #2
0
def test_pipeline_behavior():
    wineind = load_wineind()
    train, test = wineind[:125], wineind[125:]

    pipeline = Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arima", AutoARIMA(seasonal=False, stepwise=True,
                            suppress_warnings=True,
                            maxiter=3, error_action='ignore'))
    ])

    # Quick assertions on indexing
    assert len(pipeline) == 2

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {
        "fourier__n_periods": 10
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
Exemple #3
0
class TestIllegal:

    def test_non_unique_names(self):
        # Will fail since the same name repeated twice
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("stage", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "not unique" in pytest_error_str(ve)

    def test_names_in_params(self):
        # Will fail because 'steps' is a param of Pipeline
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("steps", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "names conflict" in pytest_error_str(ve)

    def test_names_double_underscore(self):
        # Will fail since the "__" is reserved for parameter names
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("stage__1", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "must not contain __" in pytest_error_str(ve)

    def test_non_transformer_in_steps(self):
        # Will fail since the first stage is not a transformer
        with pytest.raises(TypeError) as ve:
            Pipeline([
                ("stage1", (lambda *args, **kwargs: None)),  # Fail
                ("stage2", AutoARIMA())
            ])

        assert "instances of BaseTransformer" in pytest_error_str(ve)

    @pytest.mark.parametrize(
        'stages', [
            # Nothing BUT a transformer
            [("stage1", BoxCoxEndogTransformer())],

            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)
Exemple #4
0
def forecast(series,near):

	train_near = np.squeeze(near.values[:-30])
	train_series = np.squeeze(series.values[:len(train_near)])

	test_near = np.squeeze(near.values[-30:])
	test_series = np.squeeze(series.values[-30:])
	df = pd.DataFrame(columns=['aic','nonSeasonal','seasonal','k'])
	for i in range(25):
		print('value of i is:',i)
		nonSeasonalParams = ch(nonSeasonal)
		seasonalParams = ch(seasonal)
		val = sum(list(nonSeasonalParams)) + sum(list(seasonalParams))
		#try:
		if(val>8):
			continue
		seasonalParams = seasonalParams + (7,)
		#print(nonSeasonalParams,seasonalParams)
		try:
			trans = FourierFeaturizer(365.25, 1)
			y_prime, exogen = trans.fit_transform(train_series)
			#exogen = exogen.mul(pd.Series(train_series),axis=0)
			exogen['near'] = train_near
			model = sm.tsa.statespace.SARIMAX(endog = train_series, exog = train_near, order = nonSeasonalParams, seasonal_order = seasonalParams,initialization='approximate_diffuse',enforce_stationarity=False) 
			res = model.fit(disp=False)
			#print(res.aic)
			to_append = [res.aic,nonSeasonalParams,seasonalParams,1]
			a_series = pd.Series(to_append, index = df.columns)
			df = df.append(a_series, ignore_index=True)
		except:
			print('inside except block now.....')
			x = pd.Series([10000000,(1,1,1),(1,1,1),1],index = df.columns)
			df = df.append(x, ignore_index=True)
	x = pd.Series([10000000,(1,1,1),(1,1,1),1],index = df.columns)
	df = df.append(x, ignore_index=True)
	print('df is :',df)
	dx = (df[df.aic == df.aic.min()])
	dx.reset_index(inplace=True,drop=True)
	print('baest parameters are:',dx)
	value = [dx.iloc[0][0],dx.iloc[0][1],dx.iloc[0][2],dx.iloc[0][3]]
	print('final parameters are:',value)
	return value
Exemple #5
0
def test_order_does_not_matter_with_date_transformer():
    train_y_dates, test_y_dates, train_X_dates, test_X_dates = \
        train_test_split(y_dates, X_dates, test_size=15)

    pipeline_a = Pipeline([
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_a = pipeline_a.transform(exogenous=test_X_dates)
    pred_a = pipeline_a.predict(exogenous=test_X_dates)

    pipeline_b = Pipeline([
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_b = pipeline_b.transform(exogenous=test_X_dates)
    pred_b = pipeline_b.predict(exogenous=test_X_dates)

    # dates in A should differ from those in B
    assert pipeline_a.x_feats_[0].startswith("FOURIER")
    assert pipeline_a.x_feats_[-1].startswith("DATE")

    assert pipeline_b.x_feats_[0].startswith("DATE")
    assert pipeline_b.x_feats_[-1].startswith("FOURIER")

    # columns should be identical once ordered appropriately
    assert Xt_a.equals(Xt_b[pipeline_a.x_feats_])

    # forecasts should be identical
    assert_array_almost_equal(pred_a, pred_b, decimal=3)
Exemple #6
0
def basic_pipeline(data):
    pipeline = Pipeline(steps=[
        ("fourier", FourierFeaturizer(k=3, m=7)),
        ("arima", AutoARIMA(out_of_sample_size=60)),
    ])
    return GroupedPmdarima(pipeline).fit(
        data.df,
        data.key_columns,
        "y",
        "ds",
    )
 def _tune(self,
           y,
           period,
           x=None,
           metric="mse",
           val_size=None,
           verbose=False):
     """
     Tune hyperparameters of the model.
     :param y: pd.Series or 1-D np.array, time series to predict.
     :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
     for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
     data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
     "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
     :param x: not used for TBATS model
     :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
     error).
     :param val_size: Int, the number of most recent observations to use as validation set for tuning.
     :param verbose: Boolean, True for printing additional info while tuning.
     :return: None
     """
     self.period = data_utils.period_to_int(period) if type(
         period) == str else period
     val_size = int(len(y) * .1) if val_size is None else val_size
     pipe = pipeline.Pipeline([
         ("fourier", FourierFeaturizer(
             self.period,
             self.period / 2)),  # TODO: Tune no. of Fourier terms as well?
         ("arima",
          auto_arima(y,
                     m=self.period,
                     seasonal=False,
                     d=None,
                     information_criterion='oob',
                     maxiter=100,
                     error_action='ignore',
                     suppress_warnings=True,
                     stepwise=True,
                     max_order=None,
                     out_of_sample_size=val_size,
                     scoring=metric,
                     exogenous=x))
     ])
     self.params.update(pipe.steps[1][1].get_params())
     self.params["tuned"] = True
 def fit(self,
         y,
         period,
         x=None,
         metric="mse",
         val_size=None,
         verbose=False):
     """
     Build the model using best-tuned hyperparameter values.
     :param y: pd.Series or 1-D np.array, time series to predict.
     :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
     for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
     data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
     "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
     :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional
     :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
     error).
     :param val_size: Int, the number of most recent observations to use as validation set for tuning.
     :param verbose: Boolean, True for printing additional info while tuning.
     :return: None
     """
     self.y = y
     self.name = "Fourier ARIMA"
     self.key = "fourier_sarima"
     self._tune(y=y,
                period=period,
                x=x,
                metric=metric,
                val_size=val_size,
                verbose=verbose)
     pipe = pipeline.Pipeline([
         ("fourier", FourierFeaturizer(self.period, self.period / 2)),
         ("arima",
          arima.ARIMA(maxiter=100,
                      order=self.params["order"],
                      seasonal_order=self.params["seasonal_order"],
                      suppress_warnings=True))
     ])
     self.model = pipe.fit(y, exogenous=x)
Exemple #9
0
import numpy as np
from unittest import mock

y = load_wineind()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=150, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1)),
    ARIMA(
        order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True),
    Pipeline([("fourier", FourierFeaturizer(m=12)),
              ("arima", ARIMA(order=(2, 1, 0), maxiter=3))])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
                             exogenous=exog,
                             scoring='mean_squared_error',
                             cv=cv,
                             verbose=verbose)
    assert isinstance(scores, np.ndarray)


@pytest.mark.parametrize('cv', [

@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=120, step=12, h=1),
])
@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True),
        ARIMA(order=(1, 1, 2),
              seasonal_order=(0, 1, 1, 12),
              maxiter=2,
              simple_differencing=True,
              suppress_warnings=True),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", ARIMA(order=(2, 1, 0),
                            maxiter=2,
                            simple_differencing=True))
        ])
    ]
)
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('X', [None, exogenous])
def test_cv_scores(cv, est, verbose, X):
    scores = cross_val_score(
        est, y, X=X, scoring='mean_squared_error',
        cv=cv, verbose=verbose)
    assert isinstance(scores, np.ndarray)

Exemple #11
0
                     index_col=0,
                     header=None)
near = pd.read_csv('../Data/Final/Wholesale/YeolaPrice.csv',
                   names=[0.1],
                   index_col=0,
                   header=None)

train_near = np.squeeze(near.values[:-30])
train_series = np.squeeze(series.values[:len(train_near)])

test_near = np.squeeze(near.values[-30:])
test_series = np.squeeze(series.values[-30:])

for k in range(1, 4):
    print(k)
    trans = FourierFeaturizer(365.25, k)
    y_prime, exogen = trans.fit_transform(train_series)
    exogen = exogen.mul(pd.Series(train_series), axis=0)
    exogen['near'] = train_near
    model = pm.arima.auto_arima(train_series,
                                exogenous=pd.DataFrame(exogen),
                                start_p=0,
                                d=None,
                                start_q=0,
                                max_p=5,
                                max_d=2,
                                max_q=5,
                                start_P=0,
                                D=None,
                                start_Q=0,
                                max_P=5,