예제 #1
0
def test_double_pickle():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    arima.fit(y)

    # Now save it twice
    file_a = 'first.pkl'
    file_b = 'second.pkl'

    try:
        # No compression
        joblib.dump(arima, file_a)

        # Sleep between pickling so that the "pickle hash" for the ARIMA is
        # different by enough. We could theoretically also just use a UUID
        # for part of the hash to make sure it's unique?
        time.sleep(0.5)

        # Some compression
        joblib.dump(arima, file_b, compress=2)

        # Load both and prove they can both predict
        loaded_a = joblib.load(file_a)  # type: ARIMA
        loaded_b = joblib.load(file_b)  # type: ARIMA
        pred_a = loaded_a.predict(n_periods=5)
        pred_b = loaded_b.predict(n_periods=5)
        assert np.allclose(pred_a, pred_b)

    # Always remove in case we fail in try, leaving residual files
    finally:
        os.unlink(file_a)
        os.unlink(file_b)
예제 #2
0
def test_issue_286():
    mod = ARIMA(order=(1, 1, 2))
    mod.fit(wineind)

    with pytest.raises(ValueError) as ve:
        mod.predict_in_sample(start=0)
    assert "In-sample predictions undefined for" in pytest_error_str(ve)
예제 #3
0
def test_double_pickle():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    arima.fit(y)

    with tempfile.TemporaryDirectory() as tdir:

        # Now save it twice
        file_a = os.path.join(tdir, 'first.pkl')
        file_b = os.path.join(tdir, 'second.pkl')

        # No compression
        joblib.dump(arima, file_a)

        # Sleep between pickling so that the "pickle hash" for the ARIMA is
        # different by enough. We could theoretically also just use a UUID
        # for part of the hash to make sure it's unique?
        time.sleep(0.5)

        # Some compression
        joblib.dump(arima, file_b, compress=2)

        # Load both and prove they can both predict
        loaded_a = joblib.load(file_a)  # type: ARIMA
        loaded_b = joblib.load(file_b)  # type: ARIMA
        pred_a = loaded_a.predict(n_periods=5)
        pred_b = loaded_b.predict(n_periods=5)
        assert np.allclose(pred_a, pred_b)
예제 #4
0
def test_add_new_obs_deprecated():
    endog = wineind
    train, test = endog[:125], endog[125:]
    model = ARIMA(order=(1, 0, 0))

    model.fit(train)

    with pytest.warns(DeprecationWarning):
        model.add_new_observations(test)
예제 #5
0
def test_add_new_obs_deprecated():
    endog = wineind
    train, test = endog[:125], endog[125:]
    model = ARIMA(order=(1, 0, 0))

    model.fit(train)

    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        model.add_new_observations(test)
        assert len(w)
        # Might be more than one warning, so quick pass
        assert any(issubclass(wrn.category, DeprecationWarning) and
                   'pmdarima' in str(wrn.message) for wrn in w)
예제 #6
0
def test_for_older_version():
    # Fit an ARIMA
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)

    # There are three possibilities here:
    # 1. The model is serialized/deserialized BEFORE it has been fit.
    #    This means we should not get a warning.
    #
    # 2. The model is saved after being fit, but it does not have a
    #    pkg_version_ attribute due to it being an old (very old) version.
    #    We still warn for this
    #
    # 3. The model is saved after the fit, and it's version does not match.
    #    We warn for this.
    for case, do_fit, expect_warning in [(1, False, False),
                                         (2, True, True),
                                         (3, True, True)]:

        # Only fit it if we should
        if do_fit:
            arima.fit(y)

        # If it's case 2, we remove the pkg_version_. If 3, we set it low
        if case == 2:
            delattr(arima, 'pkg_version_')
        elif case == 3:
            arima.pkg_version_ = '0.0.1'  # will always be < than current

        # Pickle it
        pickle_file = 'model.pkl'
        try:
            joblib.dump(arima, pickle_file)

            # Now unpickle it and show that we get a warning (if expected)
            with warnings.catch_warnings(record=True) as w:
                arm = joblib.load(pickle_file)  # type: ARIMA

                if expect_warning:
                    assert len(w) > 0
                else:
                    assert not len(w)

                # we can still produce predictions (only if we fit)
                if do_fit:
                    arm.predict(n_periods=4)

        finally:
            os.unlink(pickle_file)
예제 #7
0
def test_for_older_version():
    # Fit an ARIMA
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)

    # There are three possibilities here:
    # 1. The model is serialized/deserialized BEFORE it has been fit.
    #    This means we should not get a warning.
    #
    # 2. The model is saved after being fit, but it does not have a
    #    pkg_version_ attribute due to it being an old (very old) version.
    #    We still warn for this
    #
    # 3. The model is saved after the fit, and it's version does not match.
    #    We warn for this.
    for case, do_fit, expect_warning in [(1, False, False),
                                         (2, True, True),
                                         (3, True, True)]:

        # Only fit it if we should
        if do_fit:
            arima.fit(y)

        # If it's case 2, we remove the pkg_version_. If 3, we set it low
        if case == 2:
            delattr(arima, 'pkg_version_')
        elif case == 3:
            arima.pkg_version_ = '0.0.1'  # will always be < than current

        with tempfile.TemporaryDirectory() as tdir:

            pickle_file = os.path.join(tdir, 'model.pkl')
            joblib.dump(arima, pickle_file)

            # Now unpickle it and show that we get a warning (if expected)
            if expect_warning:
                with pytest.warns(UserWarning):
                    arm = joblib.load(pickle_file)  # type: ARIMA
            else:
                arm = joblib.load(pickle_file)  # type: ARIMA

            # we can still produce predictions (only if we fit)
            if do_fit:
                arm.predict(n_periods=4)
예제 #8
0
def test_double_pickle():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    arima.fit(y)

    # Now save it twice
    file_a = 'first.pkl'
    file_b = 'second.pkl'

    try:
        # No compression
        joblib.dump(arima, file_a)

        # Sleep between pickling so that the "pickle hash" for the ARIMA is
        # different by enough. We could theoretically also just use a UUID
        # for part of the hash to make sure it's unique?
        time.sleep(0.5)

        # Some compression
        joblib.dump(arima, file_b, compress=2)

        # Load both and prove they can both predict
        loaded_a = joblib.load(file_a)  # type: ARIMA
        loaded_b = joblib.load(file_b)  # type: ARIMA
        pred_a = loaded_a.predict(n_periods=5)
        pred_b = loaded_b.predict(n_periods=5)
        assert np.allclose(pred_a, pred_b)

        # Remove the caches from each
        loaded_a._clear_cached_state()
        loaded_b._clear_cached_state()

        # Test the previous condition where we removed the saved state of an
        # ARIMA from statsmodels and caused an OSError and a corrupted pickle
        with pytest.raises(OSError) as o:
            joblib.load(file_a)  # fails since no cached state there!
        msg = str(o)
        assert 'Could not read saved model state' in msg, msg

    # Always remove in case we fail in try, leaving residual files
    finally:
        os.unlink(file_a)
        os.unlink(file_b)
예제 #9
0
class ARIMAModel(BaseModel):
    def __init__(self):
        """
        Initialize Model
        """
        self.seasonal = True
        self.metric = 'mse'
        self.model = None
        self.model_init = False

    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyperparameters for the model
        """
        p = config.get('p', 2)
        d = config.get('d', 0)
        q = config.get('q', 2)
        self.seasonal = config.get('seasonality_mode', True)
        P = config.get('P', 1)
        D = config.get('D', 0)
        Q = config.get('Q', 1)
        m = config.get('m', 7)
        self.metric = config.get('metric', self.metric)

        order = (p, d, q)
        if not self.seasonal:
            seasonal_order = (0, 0, 0, 0)
        else:
            seasonal_order = (P, D, Q, m)

        self.model = ARIMA(order=order,
                           seasonal_order=seasonal_order,
                           suppress_warnings=True)

    def fit_eval(self, data, validation_data, **config):
        """
        Fit on the training data from scratch.
        :param data: A 1-D numpy array as the training data
        :param validation_data: A 1-D numpy array as the evaluation data
        :return: the evaluation metric value
        """

        if not self.model_init:
            # Estimating differencing term (d) and seasonal differencing term (D)
            kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6)
            adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6)
            d = max(adf_diffs, kpss_diffs)
            D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12)
            config.update(d=d, D=D)

            self._build(**config)
            self.model_init = True

        self.model.fit(data)
        val_metric = self.evaluate(x=None,
                                   target=validation_data,
                                   metrics=[self.metric])[0].item()
        return {self.metric: val_metric}

    def predict(self, x=None, horizon=24, update=False, rolling=False):
        """
        Predict horizon time-points ahead the input x in fit_eval
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param horizon: the number of steps forward to predict
        :param update: whether to update the original model
        :param rolling: whether to use rolling prediction
        :return: predicted result of length horizon
        """
        if x is not None:
            raise ValueError("x should be None")
        if update and not rolling:
            raise Exception(
                "We don't support updating model without rolling prediction currently"
            )
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        if not update and not rolling:
            forecasts = self.model.predict(n_periods=horizon)
        elif rolling:
            if not update:
                self.save("tmp.pkl")

            forecasts = []
            for step in range(horizon):
                fc = self.model.predict(n_periods=1).item()
                forecasts.append(fc)

                # Updates the existing model with a small number of MLE steps for rolling prediction
                self.model.update(fc)

            if not update:
                self.restore("tmp.pkl")
                os.remove("tmp.pkl")

        return forecasts

    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]

    def save(self, checkpoint_file):
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling save")
        with open(checkpoint_file, 'wb') as fout:
            pickle.dump(self.model, fout)

    def restore(self, checkpoint_file):
        with open(checkpoint_file, 'rb') as fin:
            self.model = pickle.load(fin)
        self.model_init = True
예제 #10
0
# KPSS test
KPSSResults = namedtuple("KPSSResults",
                         ["kpss_stat", "p_value", "lags", "critical_values"])
kpss_results = KPSSResults(*tsa.kpss(data, nlags='auto'))
print("KPSS results:\n", kpss_results)

auto_fit = False
if auto_fit:
    arima = auto_arima(train, stepwise=True, trace=1, seasonal=False)
    print(arima.summary())
else:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        arima = ARIMA(order=(4, 1, 4), seasonal_order=None)
        arima.fit(train)

# Diagnostics plot
arima.plot_diagnostics(lags=50)
plt.gcf().suptitle('Diagnostics Plot', fontsize=14)

# !! not necessary !! Everything already plotted
# Plot Residuals and fitted values
# plt.figure()
# fitted_values = arima.predict_in_sample()
# plt.plot(df.index[:train_len - 1], fitted_values,
#          color='C0', label="Fitted values")
# plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data")
# plt.plot(df.index[:train_len - 1], arima.resid(),
#          color='C2', label="Residuals")
# plt.gca().grid(which='both', axis='x', linestyle='--')
예제 #11
0
def predict_arima(df):

    time_in=current_milli_time()
    try:
        forecast_in = open("forecast.pickle","rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error=[]
        """
        Calculate errors
        """
        if len(df) < len(future_forecast):
            error=df["memory_used"] - future_forecast[:len(df)]["memory_used"]
        elif len(df) > len(future_forecast):
            error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"]
        else:
            error=df["memory_used"]-future_forecast["memory_used"]
        overestimation=[x for x in error if x<0]
        overestimation=sum(overestimation)/len(overestimation)
        underestimation=[x for x in error if x>=0]
        underestimation=sum(underestimation)/len(underestimation)
        print("UNDERESTIMATION ERROR: "+underestimation)
        print("OVERESTIMATION ERROR: "+overestimation)
        print("Mean Absolute Error in Last iteration "+str(error))
        """
        Overestimation & Underestimation errors
        """



    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing
  
    try:
        pm.plot_pacf(df,show=False).savefig('pacf.png')
        pm.plot_acf(df,show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")


    try:
        pickle_in = open("arima.pickle","rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        #df=arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle","wb")    
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle","wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    
    '''
    tests 
    '''
    nd=1
    nsd=1
    try:
        adf_test=ADFTest(alpha=0.05)
        p_val, should_diff = adf_test.is_stationary(df["memory_used"])    

        nd = ndiffs(df, test='adf')
        logging.info(nd)
        nsd = nsdiffs(df,12)
        logging.info(nd)
    except:
        nd=1
        print("Exception on tests")

    ch_test=CHTest(12)
    
    try:
        nsd=ch_test.estimate_seasonal_differencing_term(df)
    except Exception as e:
        print(e)
        logging.error(e)
    

    '''
        ARIMA MODEL
    '''

    '''
        Find p,q dynamically
    '''
    acf_lags=acf(df["memory_used"])
    acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()]
    p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4

    pacf_lags=pacf(df["memory_used"])
    pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()]
    q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1
    d=nd

    train, test = train_test_split(df,shuffle=False, test_size=0.3)

    # If data is seasonal set the values of P,D,Q in seasonal order
    stepwise_model = ARIMA(
        order=(p,d,q),
        seasonal_order=(0,nsd,0,12),
        suppress_warnings=True,
        scoring='mse'
    )
    x=str(p)+" "+str(nd)+" "+str(q)
    print("Model with p="+str(q)+" d="+str(d)+" q="+str(q))

    try:

        stepwise_model.fit(df)
        """ 
          Vary the periods as per the forecasting window 
          n_periods= 30 = 5mins
          n_periods= 60 = 10mins
          n_periods= 90 = 15mins
        """
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"])

        res=pd.concat([df,future_forecast],axis=1)

        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle","wb")
        pickle.dump(future_forecast,forecast_out)
        forecast_out.close()
        
        trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines')
        trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines')
        data=[trace1,trace2]
        layout = go.Layout(
            title=x
        )
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        return future_forecast
    except Exception as e:
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        print(e)
        return None
예제 #12
0
def run():
    symbol = input("Enter ticker symbol: ")

    now = dt.datetime.now()
    timeFinish = now + dt.timedelta(minutes=minutes)

    while (now < timeFinish):
        try:
            now = dt.datetime.now()

            client = Client(environment=PRACTICE,
                            account_id="",
                            access_token=ACCESS_TOKEN)

            json_data = []

            json_data = client.get_instrument_history(instrument=symbol,
                                                      granularity=timeframe,
                                                      candle_format="midpoint",
                                                      count=1440)
            json_data = json_data['candles']
            df = pd.DataFrame(json_data)

            data = df.copy()
            data = data.set_index('time')[['closeMid']]
            data = data.set_index(pd.to_datetime(data.index))
            data.columns = [CLOSE]

            # Rescale data
            lnprice = np.log(data)

            # Create and fit the model
            model_temp = auto_arima(lnprice.values,
                                    start_p=1,
                                    start_q=1,
                                    max_p=1,
                                    max_q=1,
                                    m=4,
                                    start_P=0,
                                    seasonal=False,
                                    d=1,
                                    D=1,
                                    trace=True,
                                    error_action='ignore',
                                    suppress_warnings=True,
                                    stepwise=True)

            model = ARIMA(order=model_temp.order)
            fit = model.fit(lnprice.values)

            # Predict
            future_forecast = fit.predict(n_periods=n_periods_ahead)
            future_forecast = np.exp(future_forecast)

            # Calculations
            lowest = min(future_forecast[0], future_forecast[-1])
            highest = max(future_forecast[0], future_forecast[-1])
            current = data[CLOSE].iloc[-1]
            x = ((future_forecast[0] - future_forecast[-1]) /
                 future_forecast[0]) * 100
            slope = (future_forecast[0] -
                     future_forecast[-1]) / n_periods_ahead
            degree = math.degrees(math.atan(slope))

            # Trending
            if (x > 0):
                trending = "Positivly / Call"
            else:
                trending = "Negativaly / Put"

            # View
            print("==========================")
            print("Current Price: ", current)
            print("Highest price: ", highest)
            print("Lowest Price: ", lowest)
            print("Trending: ", trending)
            print("Degrees: ", degree)
            print("==========================" + "\n")
        except Exception as e:
            print(e)

        time.sleep(SLEEP)

    return 0
예제 #13
0
    whole_df = df.copy()
    df = df.iloc[10:, :]

    models_dir = "fitted_models"
    if not os.path.isdir(models_dir):
        os.mkdir(models_dir)
    fname = "{}_model.pkl".format(dataset_name[:-4])
    fpath = os.path.join(models_dir, fname)
    if not os.path.isfile(fpath) or args.overwrite:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            arima = ARIMA(
                order=saved_parameters["order"],
                seasonal_order=saved_parameters["seasonal_order"]
            )
            arima.fit(df.value)
            print("Saving fitted model on disk")
            joblib.dump(arima, fpath, compress=3)
    else:
        print("Reading model from disk")
        arima = joblib.load(fpath)

    gt_pred, gt_windows = get_gt_arrays(
        df.index, df.index, labels, labels_windows
    )

    # Compute metrics
    metrics_columns = ["precision", "recall", "f_score", "nab_score"]
    Metrics = collections.namedtuple("Metrics", metrics_columns)

    window_size = 30
예제 #14
0
def ragged_fill_series(
    series,
    function=np.nanmean,
    backup_fill_method=np.nanmean,
    est_series=None,
    fitted_arma=None,
    arma_full_series=None,
):
    """Filling in the ragged ends of a series, adhering to the periodicity of the series. If there is only one observation and periodicity cannot be determined, series will be returned unchanged.

    parameters:
            :series: list/pandas Series: the series to fill the ragged edges of. Missings should be np.nans
    :function: the function to fill nas with (e.g. np.nanmean, etc.). Use "ARMA" for ARMA filling
    :backup_fill_method: function: which function to fill ragged edges with in case ARMA can't be estimated
    :est_series: list/pandas Series: optional, the series to calculate the fillna and/or ARMA function on. Should not have nas filled in yet by any method. E.g. a train set. If None, will calculated based on itself.
    :fitted_arma: optional, fitted ARMA model if available to avoid reestimating every time in the `gen_ragged_X` function
    :arma_full_series: optional, for_full_arma_dataset output of `gen_dataset` function. Fitting the ARMA model on the full series history rather than just the series provided

    output:
            :return: pandas Series with filled ragged edges
    """
    result = pd.Series(series).copy()
    if est_series is None:
        est_series = result.copy()

    # periodicity of the series, to see which to fill in
    nonna_bools = ~pd.isna(series)
    nonna_indices = list(
        nonna_bools.index[nonna_bools])  # existing indices with values
    # if there is only one non-na observation, can't determine periodicity or position in full series, don't fill anything
    if len(nonna_indices) > 1:
        periodicity = int(
            (pd.Series(result[~pd.isna(result)].index) -
             (pd.Series(result[~pd.isna(result)].index)).shift()
             ).mode()[0])  # how often data comes (quarterly, monthly, etc.)
        last_nonna = result.index[result.notna()][-1]
        fill_indices = nonna_indices + [
            int(nonna_indices[-1] + periodicity * i)
            for i in range(1, (len(series) - last_nonna))
        ]  # indices to be filled in, including only the correct periodicity
        fill_indices = [x for x in fill_indices if x in series.index
                        ]  # cut down on the indices if went too long

        if function == "ARMA":
            # estimate the model if not given
            if fitted_arma is None:
                fitted_arma = estimate_arma(est_series)
            # instantiate model with previously estimated parameters (i.e. on train set)
            arma = ARIMA(order=fitted_arma.order)
            arma.set_params(**fitted_arma.get_params())

            # refit the model on the full series to this point
            if arma_full_series is not None:
                y = list(arma_full_series[~pd.isna(arma_full_series)])
                present = list(result[~pd.isna(result)])
                # limit the series to the point where actuals are
                end_index = 0
                for i in range(len(present), len(y) + 1):
                    if list(y[(i - len(present)):i]) == list(present):
                        end_index = i
                y = y[:end_index]
            # refit model on just this series
            else:
                y = list(result[~pd.isna(result)])  # refit the model on data
                present = y.copy()
            # can fail if not enough datapoints for order of ARMA process
            try:
                arma.fit(y, error_action="ignore")
                preds = arma.predict(n_periods=int(len(series) - last_nonna))
                fills = list(present) + list(preds)
                fills = fills[:len(fill_indices)]
            except:
                fills = list(result[~pd.isna(result)]) + [
                    backup_fill_method(est_series)
                ] * (len(series) - last_nonna)
                fills = fills[:len(fill_indices)]
            result[fill_indices] = fills
        else:
            fills = list(result[~pd.isna(result)]) + [function(est_series)] * (
                len(series) - last_nonna)
            fills = fills[:len(fill_indices)]
            result[fill_indices] = fills

    return result, fitted_arma