Esempio n. 1
0
def smad(ts,
         m=3.0,
         period=None,
         stl_seasonal=25,
         only_low_values=False,
         score=False):
    '''
        Seasonal-MAD

        Input:
            ts: pd.Series with DateTimeIndex
            m:  stardard deviation
            period: time series seasonal periodo
            stl_seasonal: STL Seasonal parameter
            only_low_values: return anomalies only for low values
            score: if True returns the decision function
        Output:
    '''
    # Seasonal component according to the Papper
    if period is not None:
        stl = STL(ts, period=period, seasonal=stl_seasonal)
    else:
        stl = STL(ts, seasonal=stl_seasonal)
    res = stl.fit()  # fit
    # calculamos o residuo
    residuo = ts - np.nanmedian(ts) - res.seasonal
    # Search outlier with mad
    mad = MAD(only_low_values=only_low_values)
    mad.fit(residuo)
    # return
    if score:
        return mad.decision_function(residuo)
    else:
        index = mad.predict(residuo, m=m).index
        return ts.loc[index]
Esempio n. 2
0
 def predict_past(self, df, freq_period, steps):
     scalerfile = self.directory + '/scaler_pred.sav'
     if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile):
         if (df["y"].max() - df["y"].min()) > 100:
             if self.verbose == 1:
                 print("PowerTransformation scaler used")
             scaler = PowerTransformer()
         else:
             if self.verbose == 1:
                 print("Identity scaler used")
             scaler = IdentityTransformer()
         self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1)))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
         pickle.dump(self.scaler2, open(scalerfile, 'wb'))
     elif os.path.isfile(scalerfile):
         self.scaler2 = pickle.load(open(scalerfile, "rb"))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
     if freq_period % 2 == 0:
         freq_period = freq_period + 1
     decomposition = STL(Y, period=freq_period + 1)
     decomposition = decomposition.fit()
     decomposition.plot()
     plt.show()
     df.loc[:, 'trend'] = decomposition.trend
     df.loc[:, 'seasonal'] = decomposition.seasonal
     df.loc[:, 'residual'] = decomposition.resid
     df= df.fillna(method="bfill")
     self.trend = np.asarray(df.loc[:, 'trend'])
     self.seasonal = np.asarray(df.loc[:, 'seasonal'])
     self.residual = np.asarray(df.loc[:, 'residual'])
     prediction, _, _ = self.make_prediction(steps)
     return prediction[0]
Esempio n. 3
0
    def fit(self, *, inner_iter=None, outer_iter=None, fit_kwargs=None):
        """
        Estimate STL and forecasting model parameters.

        Parameters
        ----------\n%(fit_params)s
        fit_kwargs : Dict[str, Any]
            Any additional keyword arguments to pass to ``model``'s ``fit``
            method when estimating the model on the decomposed residuals.

        Returns
        -------
        STLForecastResults
            Results with forecasting methods.
        """
        fit_kwargs = {} if fit_kwargs is None else fit_kwargs
        stl = STL(self._endog, **self._stl_kwargs)
        stl_fit: DecomposeResult = stl.fit(inner_iter=inner_iter,
                                           outer_iter=outer_iter)
        model_endog = stl_fit.trend + stl_fit.resid
        mod = self._model(model_endog, **self._model_kwargs)
        res = mod.fit(**fit_kwargs)
        if not hasattr(res, "forecast"):
            raise AttributeError(
                "The model's result must expose a ``forecast`` method.")
        return STLForecastResults(stl, stl_fit, mod, res, self._endog)
Esempio n. 4
0
  def extract_climate_trend(self, df, trend='STL'):
    '''
      input_params: 
        df: input the dataframe of which the trends are to be extracted from
            requirements for the dataframe:
            - dataframe index need to be datetime,
            - datetime index should be sorted
            - should be a monthly resampling
    '''
    climate_trend_df = pd.DataFrame()

    if trend == 'STL':
      yr_list = df.index.year
      #print(yr_list[-1])
      #print(yr_list[0])
      seasons = yr_list[-1] - yr_list[0]

      if seasons % 2 == 0:
        seasons += 1
      

      for col in df:
        stl = STL(df[col], period=12, seasonal=seasons, robust=True)
        res = stl.fit()
        #print(res.trend)
        climate_trend_df[col] = res.trend
        
    return climate_trend_df
Esempio n. 5
0
    def decompose_ts(self, df, label, freq='W'):

        self.freq = freq

        ts = df[['TaskDate',
                 'TaskCount']].set_index('TaskDate').resample(freq).sum()

        sns.set(rc={"figure.figsize": (10, 8)})
        print(f"{freq} decomposition of {label}")

        try:

            # Decomposition 1
            result = seasonal_decompose(
                ts, model='additive'
            )  # {model='additive', model='multiplicative'}, optional
            fig = result.plot()
            fig.savefig(
                os.path.join(self.path, self.report_img, "decompose",
                             "decompose_" + label + "_" + self.freq + ".png"))
            plt.close()

        except:

            # Decomposition with STL
            result = STL(ts).fit()
            fig = result.plot()
            fig.savefig(
                os.path.join(self.path, self.report_img, "decompose",
                             "decompose_" + label + "_" + self.freq + ".png"))
            plt.close()
def v_seasonality(datos):
    """
    Visualización de la prueba de estacionalidad por medio de gráficas de los 
    datos ya estacionarios
    
    Parameters
    ----------
    datos : pd.DataFrame : con información contenida en archivo leido

    Returns
    -------
    Cuatro gráficas en una imagen que reflejan la prueba de estacionalidad de 
    los datos 

    """
    datos = fn.f_leer_archivo(
        param_archivo='archivos/FedInterestRateDecision-UnitedStates.xlsx',
        sheet_name=0)
    datos = datos.set_index('datetime')
    datos_dif = datos - datos.shift()
    datos_dif.dropna(inplace=True)
    serie = datos_dif['actual']
    serie = serie.resample('M').mean().ffill()
    result = STL(serie).fit()
    charts = result.plot()
    plt.show()
def decompostion_STL(series, period=None, title=''):
    from statsmodels.tsa.seasonal import STL

    stl = STL(series, period=period, robust=True)
    res_robust = stl.fit()
    fig = res_robust.plot()
    fig.text(0.1, 0.95, title, size=15, color='purple')
    plt.show()
def test_short_class(default_kwargs_short):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs_short)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['short'].sort_index()
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.weights, expected.rw)
Esempio n. 9
0
def test_pickle(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit()
    pkl = pickle.dumps(mod)
    reloaded = pickle.loads(pkl)
    res2 = reloaded.fit()
    assert_allclose(res.trend, res2.trend)
    assert_allclose(res.seasonal, res2.seasonal)
    assert mod.config == reloaded.config
def test_ntjump_1_class(default_kwargs):
    default_kwargs['ntjump'] = 1
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['ntjump-1'].sort_index()
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.weights, expected.rw)
def test_pandas(default_kwargs, robust):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs, robust)
    endog = pd.Series(class_kwargs['endog'], name='y')
    period = class_kwargs['period']
    mod = STL(endog=endog, period=period)
    res = mod.fit()
    assert isinstance(res.trend, pd.Series)
    assert isinstance(res.seasonal, pd.Series)
    assert isinstance(res.resid, pd.Series)
    assert isinstance(res.weights, pd.Series)
Esempio n. 12
0
def plot_time_trend(df, name):
    if name == "VN-INDEX":
        marker_color = HOSE_COLOR
    else:
        marker_color = HNX_COLOR

    stl = STL(df[df.index.year >= 2006]["Close"],
              period=250, seasonal=21,
              robust=True)
    res = stl.fit()
    fig = make_subplots(shared_xaxes=True,
                        rows=4, cols=1)
    fig.add_trace(go.Scatter(
        y=res.observed,
        x=res.observed.index,
        name="Orignal Index",
        showlegend=False,
        marker_color=marker_color

    ),
        row=1, col=1)
    fig.add_trace(go.Scatter(y=res.trend,
                             x=res.trend.index,
                             name="Trend",
                             showlegend=False,
                             marker_color=marker_color,
                             ),
                  row=2, col=1)
    fig.add_trace(go.Scatter(
        y=res.seasonal,
        x=res.seasonal.index,
        name="Season",
        showlegend=False,
        marker_color=marker_color
    ),
        row=3, col=1)
    fig.add_trace(go.Scatter(
        y=res.resid,
        x=res.resid.index,
        showlegend=False,
        marker_color=marker_color,
        name="Resid",
    ),
        row=4, col=1)
    # Update xaxis properties
    fig.update_yaxes(title_text="Orginal", row=1, col=1)
    fig.update_yaxes(title_text="Trend", row=2, col=1)
    fig.update_yaxes(title_text="Seasonal", row=3, col=1)
    fig.update_yaxes(title_text="Residuals", row=4, col=1)
    fig.update_layout(title=f"Seasonal-Trend Decomposition of {name}",
                      height=500
                      )

    return fig
def test_baseline_class(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['baseline'].sort_index()
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.weights, expected.rw)
    resid = class_kwargs['endog'] - expected.trend - expected.season
    assert_allclose(res.resid, resid)
def test_parameter_checks_seasonal(default_kwargs):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs)
    endog = class_kwargs['endog']
    period = class_kwargs['period']
    match = 'seasonal must be an odd positive integer >= 3'
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, seasonal=2)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, seasonal=-7)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, seasonal=13.0)
Esempio n. 15
0
def n_sigma(data):
    from statsmodels.datasets import co2
    data = co2.load(True).data
    print(data.head())
    data_len = len(data)
    data = data.resample('M').mean().ffill()
    res = STL(data).fit()
    print(type(data))
    print(len(data), len(res.resid), len(res.trend), len(res.seasonal))
    res.plot()
    plt.show()
Esempio n. 16
0
def testStationarity(df, keywords):

    for keyword in keywords:

        product = readData(df, keyword)

        stl = STL(product, seasonal=13)
        res = stl.fit()
        season = res.seasonal
        result = adfuller(season)

        if result[1] > 0.05:
            print(keyword, result[1])
def test_parameter_checks_trend(default_kwargs):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs)
    endog = class_kwargs['endog']
    period = class_kwargs['period']
    match = 'trend must be an odd positive integer >= 3 where trend > period'
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, trend=14)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, trend=11)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, trend=-19)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, trend=19.0)
def STL_decomposition(df, column, year):
    df = df[(df.date_c.dt.year == year)]
    df = df.sort_values(by="date_c")
    df = df[["date_c", column]]
    df = df.resample("1D", on="date_c").mean()[[column]]
    df = df.interpolate(method="time")
    series = df[column]

    stl = STL(series, period=29, robust=True)
    res = stl.fit()

    print("Trend mean = {}".format(res.trend.mean()), flush=True)
    return res
 def get_ticker_stl(self, ticker, start_date="2015-01-01", end_date=None, period=None):
     if not end_date:
         end_date = datetime.datetime.now().date().strftime("%Y-%m-%d")
     df = self.get_ticker_data(ticker, start_date, end_date)
     df.index = pd.to_datetime(df.index)
     df.sort_index(inplace=True)
     try:
         # Try to use the input period, force 5 in failure.
         stl = STL(df["Close"], period=period).fit()
     except Exception as e:
         log.warning(e)
         stl = STL(df["Close"], period=5).fit()
     return df, stl
def predict(data, hyperparams):
    if hyperparams['seasonality']:
        stl_data = pd.Series(data=list(data.iloc[:, 1]),
                             index=list(data.iloc[:, 0]))
        stl = STL(stl_data, period=hyperparams['period'])
        resids = stl.fit().resid.values
        residual_df = pd.DataFrame(data={'residuals': resids})
        anomalies = anom_detect().evaluate(residual_df, col_name='residuals')
        anomalies_indices = list(anomalies.index)
    else:
        db = DBSCAN(eps=hyperparams['eps'],
                    min_samples=hyperparams['min_pts']).fit(data)
        anomalies_indices = np.argwhere(db.labels_ == -1).flatten().tolist()
    return anomalies_indices
def test_parameter_checks_period(default_kwargs):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs)
    endog = class_kwargs['endog']
    endog2 = np.hstack((endog[:, None], endog[:, None]))
    period = class_kwargs['period']
    with pytest.raises(ValueError, match='y must be a 1d array'):
        STL(endog=endog2, period=period)
    match = 'period must be a positive integer >= 2'
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=1)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=-12)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=4.0)
Esempio n. 22
0
def stl_decomposition(series, period=12):
    """
    Run STL decomposition on a pandas Series object.
    Parameters
    ----------
    series : Series object
        The observations to be deseasonalised.
    period : int (optional)
        Length of the seasonal period in observations.
    """

    stl = STL(series, period, robust=True)
    res = stl.fit()
    return res
Esempio n. 23
0
    def run(self, ds, y, period, points):
        if (period and period > 300) or (points and points > 500):
            return self.output_msg(
                forecast_ds=["Too many forecast or period points! (max 3000)"])

        # Financial Series, first element of ds must by the Ticker
        if len(ds) == 1:
            ticker = ds[0]
            df, stl = self.get_ticker_stl(ticker, period=period)
            df = df.reset_index()
            ds = df["Date"].values
            y = df["Close"].values
            financial = True
        else:
            try:
                # Try to use the input period, force 5 in failure.
                stl = STL(y, period=period).fit()
            except Exception as e:
                print(e)
                stl = STL(y, period=5).fit()
            financial = False
        log.info("Forecasting...")
        # Prophet
        df = pd.DataFrame(data={"ds": ds, "y": y})
        df["ds"] = pd.to_datetime(df["ds"])
        m = Prophet()
        m.fit(df)
        if financial:
            nyse = mcal.get_calendar('NYSE')
            start_date = datetime.datetime.today()
            end_date = start_date + datetime.timedelta(days=points)
            valid_days = nyse.valid_days(start_date=start_date,
                                         end_date=end_date)
            future = pd.DataFrame(data={"ds": [v.date() for v in valid_days]})
            future = pd.DataFrame(
                data={"ds": df["ds"].append(future["ds"], ignore_index=True)})
        else:
            future = m.make_future_dataframe(periods=points)
        forecast = m.predict(future)
        forecast_df = []
        for dt in forecast["ds"].values:
            ts = pd.to_datetime(dt)
            forecast_df.append(ts.strftime('%Y-%m-%d'))
        return self.output_msg(observed=stl.observed,
                               trend=stl.trend,
                               seasonal=stl.seasonal,
                               forecast=forecast["yhat"].values,
                               forecast_ds=forecast_df,
                               forecast_lower=forecast["yhat_lower"].values,
                               forecast_upper=forecast["yhat_upper"].values)
Esempio n. 24
0
def example_decomp_ts(df_SDF):
    FONT_SIZE = 14
    att = 's_nPacketDn'
    port_app = 65805
    decompfreq = int(24*60/30*7)
    dates = ['2019-03-16', '2019-06-08']
    week_labels = ['2019/03/16', '2019/03/23', '2019/03/30', '2019/04/06', '2019/04/13', '2019/04/20',
                  '2019/04/27', '2019/05/04', '2019/05/11', '2019/05/18', '2019/05/25', '2019/06/01', '2019/06/08']

    df_SDF = df_SDF.sort_values('TimeSlot', axis=0)
    temp = df_SDF[df_SDF.PortApp == port_app]
    temp.drop_duplicates(subset='TimeSlot', inplace=True)
    temp = temp.set_index('TimeSlot')
    temp.index = pd.DatetimeIndex(temp.index)
    temp = temp.reindex(pd.date_range(*dates, freq='30min'), fill_value=0)
    temp = temp.loc[dates[0]:dates[1]]

    list_colors = ['#edf2fb', '#e2eafc', '#d7e3fc', '#ccdbfd']

    for method in ['MA', 'STL', 'STL_robust']:
        if method == 'MA':
            result = seasonal_decompose(temp[att].values, period=decompfreq, model='additive', two_sided=False)
        elif method == 'STL':
            result = STL(temp[att].values, period=decompfreq).fit()
        elif method == 'STL_robust':
            result = STL(temp[att].values, period=decompfreq, robust=True).fit()
        
        fig, ax = plt.subplots(4, figsize=(9, 9), dpi=400, gridspec_kw={'wspace':0, 'hspace':0})
        ax[0].plot(result.observed[2000:], c='black', label='Observed')
        ax[0].set_facecolor(list_colors[0])
        ax[1].plot(result.trend[2000:], c='black', label='Trend')
        ax[1].set_facecolor(list_colors[1])
        ax[2].plot(result.seasonal[2000:], c='black', label='Seasonal')
        ax[2].set_facecolor(list_colors[2])
        ax[3].plot(result.resid[2000:], c='black', label='Residual')
        ax[3].set_facecolor(list_colors[3])
        
        ax[3].set_xticks(range(0, len(result.observed[2000:]), decompfreq))
        ax[3].set_xticklabels([wk[5:] for wk in week_labels[-7:]], fontsize=FONT_SIZE)
        ax[3].xaxis.set_tick_params(labelsize=FONT_SIZE)
        
        for axx in ax:
            axx.legend(fontsize=FONT_SIZE+4, loc='upper left')
            axx.yaxis.set_tick_params(labelsize=FONT_SIZE)
            axx.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
            
        fig.tight_layout()
        fig.savefig(method + '.png')
        plt.close()
Esempio n. 25
0
def test_plot(default_kwargs, close_figures):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner)
    res.plot()

    class_kwargs["endog"] = pd.Series(class_kwargs["endog"], name="CO2")
    res = STL(**class_kwargs).fit()
    res.plot()
def test_plot(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner)
    res.plot()

    class_kwargs['endog'] = pd.Series(class_kwargs['endog'], name='CO2')
    res = STL(**class_kwargs).fit()
    res.plot()
def test_parameter_checks_low_pass(default_kwargs):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs)
    endog = class_kwargs['endog']
    period = class_kwargs['period']

    match = 'low_pass must be an odd positive integer >= 3 where' \
            ' low_pass > period'
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, low_pass=14)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, low_pass=7)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, low_pass=-19)
    with pytest.raises(ValueError, match=match):
        STL(endog=endog, period=period, low_pass=19.0)
Esempio n. 28
0
    def deseason(self, dframe, method='stl', doplot=False):
        """ Compute and remove seasonal effects in the data.

        Parameters
        ----------
        dframe: pandas.DataFrame
            Pandas DataFrame with aggregations applied
        method: str
            Method for removing seasonal variations in the data. Acceptable
            values include: 
            * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method
            * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2)
            * `None`: Return the raw aggregated data
        
        Returns
        -------
        Pandas.DataFrame with seasonal affects removed as best as possible

        Notes
        -----
        1. It's best to supply as much data as possible to this method
        2. When using `method='x13'` the data must be aggregated either monthly
           (`agg='M'`) or quarterly (`agg='Q'`). This method also requires 
           installing the X-13ARIMA-SEATS software and the `statsmodels` python
           module.
        """
        # Do nothing if method is None
        if method is None:
            return dframe

        # Remove seasonal affects in the data
        for col in dframe.columns:
            
            # Interface to the US Census Bureau seasonal adjustment software
            if method.lower() == 'x13':
                results = x13_arima_analysis(dframe[col], trading=False)
                dframe[col] = results.trend
                if doplot:
                    results.plot()
            # Interface to 'statsmodels.tsa.seasonal.STL'
            elif method.lower() == 'stl':
                results = STL(dframe[col], robust=False, seasonal=3).fit()
                dframe[col] = dframe[col]-results.seasonal

                if doplot:
                    results.plot()
        
        return dframe
Esempio n. 29
0
def twitter_score(x, period=None, seasonal=45):
    '''
        Retorna os index dos valores que são anomalias
        input precisa ser um Serie com index temporal'''
    # filtrando o componente seasonal
    if period is not None:
        stl = STL(x, period=period, seasonal=seasonal)
    else:
        stl = STL(x, seasonal=seasonal)
    res = stl.fit()
    # calculamos o residuo
    residuo = x - np.nanmedian(x) - res.seasonal
    # Procuramos outliers com MAD
    mad = MAD()
    mad.fit(residuo)
    return mad.decision_function(residuo)
Esempio n. 30
0
def decompose():
    df = pd.read_csv('data/Demand_for_California_hourly_UTC_time.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
    df = df.reindex(index=df.index[::-1])
    df.index.freq = 'H' # Hourly data.
    df = df.loc['2015-01-1' : '2019-12-31']

    decomposed = STL(df, seasonal=25, period=501).fit()
    trend = decomposed.trend
    seasonal = decomposed.seasonal
    rest = decomposed.resid
    
    print(str(rest.var() / (rest.var() + trend.var())))
    print(str(rest.var() / (rest.var() + seasonal.var())))
    trend.plot()

 
    #sns.set(rc={'figure.figsize':(30, 3)})
    #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='additive', period=24)
    #trend = components.trend
    #seasonal = components.seasonal
    #rest = components.resid

    #print(str(1 - rest.var() / (rest.var() + trend.var())))
    #print(str(1 - rest.var() / (rest.var() + seasonal.var())))



    #components.plot()
    #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24).seasonal
    #components.plot()
    #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24 * 7)
    #components.plot()
    #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24 * 7 * 52)
    #components.plot()
    plt.show()