def test_double_pickle(): arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) arima.fit(y) # Now save it twice file_a = 'first.pkl' file_b = 'second.pkl' try: # No compression joblib.dump(arima, file_a) # Sleep between pickling so that the "pickle hash" for the ARIMA is # different by enough. We could theoretically also just use a UUID # for part of the hash to make sure it's unique? time.sleep(0.5) # Some compression joblib.dump(arima, file_b, compress=2) # Load both and prove they can both predict loaded_a = joblib.load(file_a) # type: ARIMA loaded_b = joblib.load(file_b) # type: ARIMA pred_a = loaded_a.predict(n_periods=5) pred_b = loaded_b.predict(n_periods=5) assert np.allclose(pred_a, pred_b) # Always remove in case we fail in try, leaving residual files finally: os.unlink(file_a) os.unlink(file_b)
def test_issue_286(): mod = ARIMA(order=(1, 1, 2)) mod.fit(wineind) with pytest.raises(ValueError) as ve: mod.predict_in_sample(start=0) assert "In-sample predictions undefined for" in pytest_error_str(ve)
def test_double_pickle(): arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) arima.fit(y) with tempfile.TemporaryDirectory() as tdir: # Now save it twice file_a = os.path.join(tdir, 'first.pkl') file_b = os.path.join(tdir, 'second.pkl') # No compression joblib.dump(arima, file_a) # Sleep between pickling so that the "pickle hash" for the ARIMA is # different by enough. We could theoretically also just use a UUID # for part of the hash to make sure it's unique? time.sleep(0.5) # Some compression joblib.dump(arima, file_b, compress=2) # Load both and prove they can both predict loaded_a = joblib.load(file_a) # type: ARIMA loaded_b = joblib.load(file_b) # type: ARIMA pred_a = loaded_a.predict(n_periods=5) pred_b = loaded_b.predict(n_periods=5) assert np.allclose(pred_a, pred_b)
def test_add_new_obs_deprecated(): endog = wineind train, test = endog[:125], endog[125:] model = ARIMA(order=(1, 0, 0)) model.fit(train) with pytest.warns(DeprecationWarning): model.add_new_observations(test)
def test_add_new_obs_deprecated(): endog = wineind train, test = endog[:125], endog[125:] model = ARIMA(order=(1, 0, 0)) model.fit(train) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") model.add_new_observations(test) assert len(w) # Might be more than one warning, so quick pass assert any(issubclass(wrn.category, DeprecationWarning) and 'pmdarima' in str(wrn.message) for wrn in w)
def test_for_older_version(): # Fit an ARIMA arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) # There are three possibilities here: # 1. The model is serialized/deserialized BEFORE it has been fit. # This means we should not get a warning. # # 2. The model is saved after being fit, but it does not have a # pkg_version_ attribute due to it being an old (very old) version. # We still warn for this # # 3. The model is saved after the fit, and it's version does not match. # We warn for this. for case, do_fit, expect_warning in [(1, False, False), (2, True, True), (3, True, True)]: # Only fit it if we should if do_fit: arima.fit(y) # If it's case 2, we remove the pkg_version_. If 3, we set it low if case == 2: delattr(arima, 'pkg_version_') elif case == 3: arima.pkg_version_ = '0.0.1' # will always be < than current # Pickle it pickle_file = 'model.pkl' try: joblib.dump(arima, pickle_file) # Now unpickle it and show that we get a warning (if expected) with warnings.catch_warnings(record=True) as w: arm = joblib.load(pickle_file) # type: ARIMA if expect_warning: assert len(w) > 0 else: assert not len(w) # we can still produce predictions (only if we fit) if do_fit: arm.predict(n_periods=4) finally: os.unlink(pickle_file)
def test_for_older_version(): # Fit an ARIMA arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) # There are three possibilities here: # 1. The model is serialized/deserialized BEFORE it has been fit. # This means we should not get a warning. # # 2. The model is saved after being fit, but it does not have a # pkg_version_ attribute due to it being an old (very old) version. # We still warn for this # # 3. The model is saved after the fit, and it's version does not match. # We warn for this. for case, do_fit, expect_warning in [(1, False, False), (2, True, True), (3, True, True)]: # Only fit it if we should if do_fit: arima.fit(y) # If it's case 2, we remove the pkg_version_. If 3, we set it low if case == 2: delattr(arima, 'pkg_version_') elif case == 3: arima.pkg_version_ = '0.0.1' # will always be < than current with tempfile.TemporaryDirectory() as tdir: pickle_file = os.path.join(tdir, 'model.pkl') joblib.dump(arima, pickle_file) # Now unpickle it and show that we get a warning (if expected) if expect_warning: with pytest.warns(UserWarning): arm = joblib.load(pickle_file) # type: ARIMA else: arm = joblib.load(pickle_file) # type: ARIMA # we can still produce predictions (only if we fit) if do_fit: arm.predict(n_periods=4)
def test_double_pickle(): arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) arima.fit(y) # Now save it twice file_a = 'first.pkl' file_b = 'second.pkl' try: # No compression joblib.dump(arima, file_a) # Sleep between pickling so that the "pickle hash" for the ARIMA is # different by enough. We could theoretically also just use a UUID # for part of the hash to make sure it's unique? time.sleep(0.5) # Some compression joblib.dump(arima, file_b, compress=2) # Load both and prove they can both predict loaded_a = joblib.load(file_a) # type: ARIMA loaded_b = joblib.load(file_b) # type: ARIMA pred_a = loaded_a.predict(n_periods=5) pred_b = loaded_b.predict(n_periods=5) assert np.allclose(pred_a, pred_b) # Remove the caches from each loaded_a._clear_cached_state() loaded_b._clear_cached_state() # Test the previous condition where we removed the saved state of an # ARIMA from statsmodels and caused an OSError and a corrupted pickle with pytest.raises(OSError) as o: joblib.load(file_a) # fails since no cached state there! msg = str(o) assert 'Could not read saved model state' in msg, msg # Always remove in case we fail in try, leaving residual files finally: os.unlink(file_a) os.unlink(file_b)
class ARIMAModel(BaseModel): def __init__(self): """ Initialize Model """ self.seasonal = True self.metric = 'mse' self.model = None self.model_init = False def _build(self, **config): """ build the models and initialize. :param config: hyperparameters for the model """ p = config.get('p', 2) d = config.get('d', 0) q = config.get('q', 2) self.seasonal = config.get('seasonality_mode', True) P = config.get('P', 1) D = config.get('D', 0) Q = config.get('Q', 1) m = config.get('m', 7) self.metric = config.get('metric', self.metric) order = (p, d, q) if not self.seasonal: seasonal_order = (0, 0, 0, 0) else: seasonal_order = (P, D, Q, m) self.model = ARIMA(order=order, seasonal_order=seasonal_order, suppress_warnings=True) def fit_eval(self, data, validation_data, **config): """ Fit on the training data from scratch. :param data: A 1-D numpy array as the training data :param validation_data: A 1-D numpy array as the evaluation data :return: the evaluation metric value """ if not self.model_init: # Estimating differencing term (d) and seasonal differencing term (D) kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6) d = max(adf_diffs, kpss_diffs) D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12) config.update(d=d, D=D) self._build(**config) self.model_init = True self.model.fit(data) val_metric = self.evaluate(x=None, target=validation_data, metrics=[self.metric])[0].item() return {self.metric: val_metric} def predict(self, x=None, horizon=24, update=False, rolling=False): """ Predict horizon time-points ahead the input x in fit_eval :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param horizon: the number of steps forward to predict :param update: whether to update the original model :param rolling: whether to use rolling prediction :return: predicted result of length horizon """ if x is not None: raise ValueError("x should be None") if update and not rolling: raise Exception( "We don't support updating model without rolling prediction currently" ) if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if not update and not rolling: forecasts = self.model.predict(n_periods=horizon) elif rolling: if not update: self.save("tmp.pkl") forecasts = [] for step in range(horizon): fc = self.model.predict(n_periods=1).item() forecasts.append(fc) # Updates the existing model with a small number of MLE steps for rolling prediction self.model.update(fc) if not update: self.restore("tmp.pkl") os.remove("tmp.pkl") return forecasts def evaluate(self, target, x=None, metrics=['mse'], rolling=False): """ Evaluate on the prediction results and y. We predict horizon time-points ahead the input x in fit_eval before evaluation, where the horizon length equals the second dimension size of y. :param target: target for evaluation. :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param metrics: a list of metrics in string format :param rolling: whether to use rolling prediction :return: a list of metric evaluation results """ if x is not None: raise ValueError("We don't support input x currently") if target is None: raise ValueError("Input invalid target of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling evaluate" ) forecasts = self.predict(horizon=len(target), rolling=rolling) return [Evaluator.evaluate(m, target, forecasts) for m in metrics] def save(self, checkpoint_file): if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling save") with open(checkpoint_file, 'wb') as fout: pickle.dump(self.model, fout) def restore(self, checkpoint_file): with open(checkpoint_file, 'rb') as fin: self.model = pickle.load(fin) self.model_init = True
# KPSS test KPSSResults = namedtuple("KPSSResults", ["kpss_stat", "p_value", "lags", "critical_values"]) kpss_results = KPSSResults(*tsa.kpss(data, nlags='auto')) print("KPSS results:\n", kpss_results) auto_fit = False if auto_fit: arima = auto_arima(train, stepwise=True, trace=1, seasonal=False) print(arima.summary()) else: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA(order=(4, 1, 4), seasonal_order=None) arima.fit(train) # Diagnostics plot arima.plot_diagnostics(lags=50) plt.gcf().suptitle('Diagnostics Plot', fontsize=14) # !! not necessary !! Everything already plotted # Plot Residuals and fitted values # plt.figure() # fitted_values = arima.predict_in_sample() # plt.plot(df.index[:train_len - 1], fitted_values, # color='C0', label="Fitted values") # plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data") # plt.plot(df.index[:train_len - 1], arima.resid(), # color='C2', label="Residuals") # plt.gca().grid(which='both', axis='x', linestyle='--')
def predict_arima(df): time_in=current_milli_time() try: forecast_in = open("forecast.pickle","rb") future_forecast = pickle.load(forecast_in) forecast_in.append(df) error=[] """ Calculate errors """ if len(df) < len(future_forecast): error=df["memory_used"] - future_forecast[:len(df)]["memory_used"] elif len(df) > len(future_forecast): error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"] else: error=df["memory_used"]-future_forecast["memory_used"] overestimation=[x for x in error if x<0] overestimation=sum(overestimation)/len(overestimation) underestimation=[x for x in error if x>=0] underestimation=sum(underestimation)/len(underestimation) print("UNDERESTIMATION ERROR: "+underestimation) print("OVERESTIMATION ERROR: "+overestimation) print("Mean Absolute Error in Last iteration "+str(error)) """ Overestimation & Underestimation errors """ except Exception as e: print("RMSE To be computed") # Do Nothing try: pm.plot_pacf(df,show=False).savefig('pacf.png') pm.plot_acf(df,show=False).savefig('acf.png') except: print("Data points insufficient for ACF & PACF") try: pickle_in = open("arima.pickle","rb") arima_data = pickle.load(pickle_in) arima_data.append(df) #df=arima_data except Exception as e: arima_data_out = open("arima.pickle","wb") pickle.dump([], arima_data_out) arima_data_out = open("arima.pickle","wb") pickle.dump(df, arima_data_out) arima_data_out.close() ''' tests ''' nd=1 nsd=1 try: adf_test=ADFTest(alpha=0.05) p_val, should_diff = adf_test.is_stationary(df["memory_used"]) nd = ndiffs(df, test='adf') logging.info(nd) nsd = nsdiffs(df,12) logging.info(nd) except: nd=1 print("Exception on tests") ch_test=CHTest(12) try: nsd=ch_test.estimate_seasonal_differencing_term(df) except Exception as e: print(e) logging.error(e) ''' ARIMA MODEL ''' ''' Find p,q dynamically ''' acf_lags=acf(df["memory_used"]) acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()] p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4 pacf_lags=pacf(df["memory_used"]) pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()] q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1 d=nd train, test = train_test_split(df,shuffle=False, test_size=0.3) # If data is seasonal set the values of P,D,Q in seasonal order stepwise_model = ARIMA( order=(p,d,q), seasonal_order=(0,nsd,0,12), suppress_warnings=True, scoring='mse' ) x=str(p)+" "+str(nd)+" "+str(q) print("Model with p="+str(q)+" d="+str(d)+" q="+str(q)) try: stepwise_model.fit(df) """ Vary the periods as per the forecasting window n_periods= 30 = 5mins n_periods= 60 = 10mins n_periods= 90 = 15mins """ future_forecast = stepwise_model.predict(n_periods=len(test)) future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"]) res=pd.concat([df,future_forecast],axis=1) ''' Save Forecast in Pickle ''' forecast_out = open("forecast.pickle","wb") pickle.dump(future_forecast,forecast_out) forecast_out.close() trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines') trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines') data=[trace1,trace2] layout = go.Layout( title=x ) fig = go.Figure(data=data, layout=layout) plot(fig, filename="prediction") print("Current values") print(df) print("Predicted Data Points") print(future_forecast) time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) return future_forecast except Exception as e: time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) print(e) return None
def run(): symbol = input("Enter ticker symbol: ") now = dt.datetime.now() timeFinish = now + dt.timedelta(minutes=minutes) while (now < timeFinish): try: now = dt.datetime.now() client = Client(environment=PRACTICE, account_id="", access_token=ACCESS_TOKEN) json_data = [] json_data = client.get_instrument_history(instrument=symbol, granularity=timeframe, candle_format="midpoint", count=1440) json_data = json_data['candles'] df = pd.DataFrame(json_data) data = df.copy() data = data.set_index('time')[['closeMid']] data = data.set_index(pd.to_datetime(data.index)) data.columns = [CLOSE] # Rescale data lnprice = np.log(data) # Create and fit the model model_temp = auto_arima(lnprice.values, start_p=1, start_q=1, max_p=1, max_q=1, m=4, start_P=0, seasonal=False, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) model = ARIMA(order=model_temp.order) fit = model.fit(lnprice.values) # Predict future_forecast = fit.predict(n_periods=n_periods_ahead) future_forecast = np.exp(future_forecast) # Calculations lowest = min(future_forecast[0], future_forecast[-1]) highest = max(future_forecast[0], future_forecast[-1]) current = data[CLOSE].iloc[-1] x = ((future_forecast[0] - future_forecast[-1]) / future_forecast[0]) * 100 slope = (future_forecast[0] - future_forecast[-1]) / n_periods_ahead degree = math.degrees(math.atan(slope)) # Trending if (x > 0): trending = "Positivly / Call" else: trending = "Negativaly / Put" # View print("==========================") print("Current Price: ", current) print("Highest price: ", highest) print("Lowest Price: ", lowest) print("Trending: ", trending) print("Degrees: ", degree) print("==========================" + "\n") except Exception as e: print(e) time.sleep(SLEEP) return 0
whole_df = df.copy() df = df.iloc[10:, :] models_dir = "fitted_models" if not os.path.isdir(models_dir): os.mkdir(models_dir) fname = "{}_model.pkl".format(dataset_name[:-4]) fpath = os.path.join(models_dir, fname) if not os.path.isfile(fpath) or args.overwrite: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA( order=saved_parameters["order"], seasonal_order=saved_parameters["seasonal_order"] ) arima.fit(df.value) print("Saving fitted model on disk") joblib.dump(arima, fpath, compress=3) else: print("Reading model from disk") arima = joblib.load(fpath) gt_pred, gt_windows = get_gt_arrays( df.index, df.index, labels, labels_windows ) # Compute metrics metrics_columns = ["precision", "recall", "f_score", "nab_score"] Metrics = collections.namedtuple("Metrics", metrics_columns) window_size = 30
def ragged_fill_series( series, function=np.nanmean, backup_fill_method=np.nanmean, est_series=None, fitted_arma=None, arma_full_series=None, ): """Filling in the ragged ends of a series, adhering to the periodicity of the series. If there is only one observation and periodicity cannot be determined, series will be returned unchanged. parameters: :series: list/pandas Series: the series to fill the ragged edges of. Missings should be np.nans :function: the function to fill nas with (e.g. np.nanmean, etc.). Use "ARMA" for ARMA filling :backup_fill_method: function: which function to fill ragged edges with in case ARMA can't be estimated :est_series: list/pandas Series: optional, the series to calculate the fillna and/or ARMA function on. Should not have nas filled in yet by any method. E.g. a train set. If None, will calculated based on itself. :fitted_arma: optional, fitted ARMA model if available to avoid reestimating every time in the `gen_ragged_X` function :arma_full_series: optional, for_full_arma_dataset output of `gen_dataset` function. Fitting the ARMA model on the full series history rather than just the series provided output: :return: pandas Series with filled ragged edges """ result = pd.Series(series).copy() if est_series is None: est_series = result.copy() # periodicity of the series, to see which to fill in nonna_bools = ~pd.isna(series) nonna_indices = list( nonna_bools.index[nonna_bools]) # existing indices with values # if there is only one non-na observation, can't determine periodicity or position in full series, don't fill anything if len(nonna_indices) > 1: periodicity = int( (pd.Series(result[~pd.isna(result)].index) - (pd.Series(result[~pd.isna(result)].index)).shift() ).mode()[0]) # how often data comes (quarterly, monthly, etc.) last_nonna = result.index[result.notna()][-1] fill_indices = nonna_indices + [ int(nonna_indices[-1] + periodicity * i) for i in range(1, (len(series) - last_nonna)) ] # indices to be filled in, including only the correct periodicity fill_indices = [x for x in fill_indices if x in series.index ] # cut down on the indices if went too long if function == "ARMA": # estimate the model if not given if fitted_arma is None: fitted_arma = estimate_arma(est_series) # instantiate model with previously estimated parameters (i.e. on train set) arma = ARIMA(order=fitted_arma.order) arma.set_params(**fitted_arma.get_params()) # refit the model on the full series to this point if arma_full_series is not None: y = list(arma_full_series[~pd.isna(arma_full_series)]) present = list(result[~pd.isna(result)]) # limit the series to the point where actuals are end_index = 0 for i in range(len(present), len(y) + 1): if list(y[(i - len(present)):i]) == list(present): end_index = i y = y[:end_index] # refit model on just this series else: y = list(result[~pd.isna(result)]) # refit the model on data present = y.copy() # can fail if not enough datapoints for order of ARMA process try: arma.fit(y, error_action="ignore") preds = arma.predict(n_periods=int(len(series) - last_nonna)) fills = list(present) + list(preds) fills = fills[:len(fill_indices)] except: fills = list(result[~pd.isna(result)]) + [ backup_fill_method(est_series) ] * (len(series) - last_nonna) fills = fills[:len(fill_indices)] result[fill_indices] = fills else: fills = list(result[~pd.isna(result)]) + [function(est_series)] * ( len(series) - last_nonna) fills = fills[:len(fill_indices)] result[fill_indices] = fills return result, fitted_arma