Esempi in Python per decompose, esempi in Python per stldecompose.decompose

Esempio n. 1

0

Mostra file

def seasonal_esd(ts,
                 seasonality=None,
                 hybrid=False,
                 max_anomalies=10,
                 alpha=0.05):
    """
    Compute the Seasonal Extreme Studentized Deviate of a time series.
    The steps taken are first to to decompose the time series into STL
    decomposition (trend, seasonality, residual). Then, calculate
    the Median Absolute Deviate (MAD) if hybrid (otherwise the median)
    and perform a regular ESD test on the residual, which we calculate as:
                    R = ts - seasonality - MAD or median
    Note: The statsmodel library requires a seasonality to compute the STL
    decomposition, hence the parameter seasonality. If none is given,
    then it will automatically be calculated to be 20% of the total
    timeseries.
    Args:
    ts (list or np.array): The timeseries to compute the ESD.
    seasonality (int): Number of time points for a season.
    hybrid (bool): See Twitter's research paper for difference.
    max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
    alpha (float): The significance level.
    Returns:
    list int: The indices of the anomalies in the timeseries.
    """
    ts = np.array(ts)
    seasonal = seasonality or int(
        0.2 * len(ts))  # Seasonality is 20% of the ts if not given.
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual,
                   max_anomalies=max_anomalies,
                   alpha=alpha,
                   hybrid=hybrid)
    return outliers

Esempio n. 2

0

Mostra file

File: math.py Progetto: dmryutov/otus-python-0319-final

def calculate_decomposition(data, model=MODEL_ADDITIVE, frequency=2):
    """
    Calculate time series decomposition

    Args:
        data (list[float]): Input time series values
        model (str): Seasonal component type
        frequency (int): Seasonal component frequency

    Returns:
        dict: Calculation results (trend, seasonal, residual components)
    """
    # Prepare data
    if model == MODEL_MULTIPLICATIVE:
        data = [log(x) for x in data]
    # Use model
    decomp = decompose(data, period=frequency)
    # Prepare result data
    if model == MODEL_MULTIPLICATIVE:
        decomp.trend = [exp(x) for x in decomp.trend]
        decomp.seasonal = [exp(x) for x in decomp.seasonal]
        decomp.resid = [exp(x) for x in decomp.resid]
    return {
        'trend': decomp.trend,
        'seasonal': decomp.seasonal,
        'resid': decomp.resid,
    }

Esempio n. 3

0

Mostra file

File: hht.py Progetto: yellowsimulator/uibmscode

def get_imf(exp_numb,channel):
    samples = 100#728 #714, 724(4)
    path = "../data/imf_bpfo/sample{}_imfs.csv".format(samples)
    df = pd.read_csv(path)
    #new_df = df.loc[:,"imf1":"imf4"]
    #new_df.plot()
    #plt.show()
    #exit()
    k = 6
    #for k in range(1,9):
    s = df["imf{}".format(k)].values

    decomp = decompose(s)
    lim = 10000
    seasonality = decomp.seasonal[:lim]

    t = np.linspace(0,1, lim)
    plt.plot(t,seasonality)
    #data = get_experiment_bearing_data(exp_numb,channel)
    #k = 850
    #j = 8
    #signal = data[k]
    #imfs = get_imfs(signal)
    #imf = imfs[j]
    #decomp = decompose(imf)
    #pulse = decomp.seasonal
    #lim = 10000
    #plt.plot(pulse[:lim])
    plt.show()

Esempio n. 4

0

Mostra file

def series_decompose(series_df):
    '''
    Description: Runs the stldecompose decompose method on a time series from create_float_series by adding 96 (number of points in a day in the database) as period value
    Parameters: A pandas DataFrame time series
    Returns: a statsmodel object representing the decomposed series 
    '''
    decomped_series = decompose(series_df.values, period=96)
    return decomped_series

Esempio n. 5

0

Mostra file

File: stl_decomposition.py Progetto: sak2km/colocation_ga

def _type_std(dataframe, type_count, type_id, length):
    seasonal_vals = np.zeros(
        (len(dataframe.columns) // type_count, len(dataframe)))
    for i in range(type_id, len(dataframe.columns), type_count):
        seasonal_vals[i // type_count] = stl.decompose(
            dataframe[i], period=length).seasonal.values
    type_std = np.mean(np.std(seasonal_vals, axis=0))
    return length, type_std

Esempio n. 6

0

Mostra file

def main():

    #load data

    e = exchange.Exchange('../../lib/binance.db')

    start = int(datetime.datetime(2018, 4, 1).timestamp() * 1000)
    end = int(datetime.datetime(2019, 5, 1).timestamp() * 1000)
    #end = int(datetime.datetime(2018, 5, 1).timestamp() * 1000)

    print('Loading order data...')

    number_of_orders, prices = e.get_total_orders_ts('BTCUSDT',
                                                     60 * 60 * 1000 * 6, start,
                                                     end)  #hourly data

    print('done')

    buy_orders = np.array([b for s, b in number_of_orders])
    sell_orders = np.array([s for s, b in number_of_orders])

    buy_orders[buy_orders <= 0] = np.mean(buy_orders)
    sell_orders[sell_orders <= 0] = np.mean(sell_orders)

    returns = np.array(calculate_returns(prices))

    returns_decomp = decompose(returns, period=24 * 7)

    sa_returns = returns_decomp.trend + returns_decomp.resid

    buy_orders_decomp = decompose(buy_orders, period=24 * 7)
    sa_buy_orders = buy_orders_decomp.trend + buy_orders_decomp.resid

    sell_orders_decomp = decompose(sell_orders, period=24 * 7)
    sa_sell_orders = sell_orders_decomp.trend + sell_orders_decomp.resid

    x, corrs = get_correlation_coef(
        buy_orders_decomp.resid - sell_orders_decomp.resid,
        returns_decomp.resid, 28, False)

    plt.bar(x, corrs)

    plt.show()

Esempio n. 7

0

Mostra file

File: plot.py Progetto: Extracheesy/StockTrendPredictionClassifier_1

def plot_seasonal(df, tic, OUT_DIR):
    df_close = df[['date', 'close']].copy()
    df_close = df_close.set_index('date')

    decomp = decompose(df_close, period=365)
    fig = decomp.plot()
    fig.set_size_inches(20, 8)

    filename = OUT_DIR + tic + "_decompose.png"
    plt.savefig(filename, dpi=500)

Esempio n. 8

0

Mostra file

File: stl_decomposition.py Progetto: sak2km/colocation_ga

def _type_std_seasonal(dataframe, type_count, length):
    # pylint: disable=no-member
    seasonal_vals = np.zeros((len(dataframe.columns), len(dataframe)))
    for i in range(len(dataframe.columns)):
        seasonal_vals[i] = stl.decompose(dataframe[i],
                                         period=length).seasonal.values
    type_stds = [
        np.mean(np.std(seasonal_vals[t::type_count], axis=0))
        for t in range(type_count)
    ]
    return length, np.mean(type_stds)

Esempio n. 9

0

Mostra file

    def stl_seasonal_decomposition(self):
        if self.has_validation_error:
            return
        # Decomposition based on stl - Package: stldecompose
        org_unit_group_stl = decompose(self.series, period=12)

        fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 9))
        self.series.plot(ax=ax1)
        org_unit_group_stl.trend.plot(ax=ax2)
        org_unit_group_stl.seasonal.plot(ax=ax3)
        org_unit_group_stl.resid.plot(ax=ax4)
        ax1.set_title("Vaccine Demand for {} in {}".format(
            self.vaccine, self.health_facility))
        ax2.set_title("Trend")
        ax3.set_title("Seasonality")
        ax4.set_title("Residuals")
        plt.tight_layout()
        plt.show()

        # Eliminating the seasonal component
        org_unit_group_adjusted = self.series - org_unit_group_stl.seasonal
        plt.figure(figsize=(12, 8))
        org_unit_group_adjusted.plot()
        plt.title(
            "Plot of Vaccine Demand of {} in {} without Seasonal Component".
            format(self.vaccine, self.health_facility))
        plt.show()
        #
        # Getting the seasonal component only
        # Seasonality gives structure to the data
        plt.figure(figsize=(12, 8))
        org_unit_group_stl.seasonal.plot()
        plt.title(
            "Plot of Seasonal Component of Vaccine Demand of {} in {}".format(
                self.vaccine, self.health_facility))
        plt.show()

        # Creating a forecast based on STL
        stl_fcast = forecast(org_unit_group_stl,
                             steps=12,
                             fc_func=seasonal_naive,
                             seasonal=True)

        # Plot of the forecast and the original data
        plt.figure(figsize=(12, 8))
        plt.plot(self.series, label='BCG Wastage Rate')
        plt.plot(stl_fcast, label=stl_fcast.columns[0])
        plt.title(
            "Plot of Vaccine Demand of {} in {} Next Year Forecast".format(
                self.vaccine, self.health_facility))
        plt.legend()
        plt.show()

Esempio n. 10

0

Mostra file

    def detrend(self, label=None):
        """Returns the timeseries without trend component

            Args:
                None
            Returns:
                A FixedIndexTimeseries Object

            Raises:
                None
            """
        dec = decompose(self.timeseries.values, period=self.maxindex)
        return FixedIndexTimeseries(pandas.Series(dec.resid, index=self.timeseries.index)+pandas.Series(dec.seasonal, index=self.timeseries.index), mode=self.mode, label=self.label if label is None else label)

Esempio n. 11

0

Mostra file

File: uber2.py Progetto: Cirakman/forecasting-employement-figures

def createDataset_uber(look_back):
    scaler = preprocessing.MinMaxScaler()
    df = pd.read_csv(DATASET_STATES_CSV,
                     index_col=0,
                     parse_dates=True,
                     encoding="utf-8")
    df = normalize_df(df, DATA_SCALER())
    df += 1
    df = df.apply(np.log)
    df2 = df.copy()
    for col in df:
        decomp = decompose(df[col], period=12)
        df[col] = decomp.trend + decomp.resid
        df2[col + '_t'] = decomp.trend
        df2[col + '_s'] = decomp.seasonal
        df2[col + '_r'] = decomp.resid
    res = []
    for col in df:
        x, y, offsets, x_s, y_s = create_dataset_uberSc(
            df[col], df2[col + '_t'], df2[col + '_s'], look_back)
        x_test = np.copy(x[-FORECASTING_STEPS:])
        y_test = np.copy(y[-FORECASTING_STEPS:])
        offsets_test = np.copy(offsets[-FORECASTING_STEPS:])
        x_s_test = np.copy(x_s[-FORECASTING_STEPS:])
        y_s_test = np.copy(y_s[-FORECASTING_STEPS:])
        x = x[:-FORECASTING_STEPS]
        y = y[:-FORECASTING_STEPS]
        x_s = x_s[:-FORECASTING_STEPS]
        y_s = y_s[:-FORECASTING_STEPS]
        offsets = offsets[:-FORECASTING_STEPS]
        x_val = np.copy(x[-1:])
        y_val = np.copy(y[-1:])
        x_s_val = np.copy(x_s[-1:])
        y_s_val = np.copy(y_s[-1:])
        offsets_val = np.copy(offsets[-1:])
        x_train = np.copy(x[:-1])
        y_train = np.copy(y[:-1])
        x_s_train = np.copy(x_s[:-1])
        y_s_train = np.copy(y_s[:-1])
        offsets_train = np.copy(offsets[:-1])
        #y_test = test
        #x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.33, random_state=RANDOM_SEED)
        res.append((x_train, x_val, x_test, y_train, y_val, y_test,
                    offsets_train, offsets_val, offsets_test, x_s_train,
                    x_s_val, x_s_test, y_s_train, y_s_val, y_s_test))
    return res

Esempio n. 12

0

Mostra file

    def trend(data, period, yl):
        stl = decompose(data, period=len(data))
        plt.figure(facecolor='#ffffff')
        plt.gcf().clear()
        plt.plot(stl.trend, 'o-', marker='o', color='b')
        plt.xticks(rotation=30)
        plt.grid(True, color='#e5e5cc', linestyle='-', linewidth=1)
        plt.xlabel(None)
        plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20)
        plt.title('Sales Trends', y=1.05, color='#630b0b', fontsize=21)
        plt.tight_layout()

        #         img = io.BytesIO()
        #         plt.savefig(img, format = 'png')
        #         plot = base64.b64encode(img.getvalue()).decode()
        #         return plot
        return plt.show

Esempio n. 13

0

Mostra file

File: AddMeal.py Progetto: tanihajoseph/NewMeal

    def create_model(Date):
        i=totalMeals
        ts = timeseries_df(i,Date)
        X = ts.values

        #STL
        modelSTL,errorSTL=stl(X,ts)

        #ETS
        modelETS,errorETS=ets(X)

        #Comparing errors of ETS and STL
        print(errorSTL)
        print(errorETS)
        error=min(errorSTL,errorETS)
        if(error==errorSTL):
            FinalModel=modelSTL
            FModel='STL'
            print("STL")
        elif(error==errorETS):
            FinalModel=modelETS
            FModel='ETS'
            print("ETS")

        #If STL Model is appropriate
        if(FModel=='STL'):
            from stldecompose import decompose, forecast
            globals()['STL%s' % i] = FinalModel
            STL.append(i)
            FinalModel = decompose(ts, period=7)    
            joblib.dump(FinalModel, 'STL'+ str(i) +'.xml', compress=1)

        #If ETS Model is appropriate
        elif(FModel=='ETS'):
            globals()['ETS%s' % i] = FinalModel
            ETS.append(i)
            if(modelETS==1):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='add',damped=True).fit(use_boxcox=True)
            if(modelETS==2):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='mul',damped=True).fit(use_boxcox=True)
            if(modelETS==3):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='add',damped=True).fit(use_boxcox=True)
            if(modelETS==4):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='mul',damped=True).fit(use_boxcox=True)
            joblib.dump(FinalModel, 'ETS'+ str(i) +'.xml', compress=1)

Esempio n. 14

0

Mostra file

def seasonal_esd(ts,
                 seasonality=None,
                 hybrid=False,
                 max_anomalies=10,
                 alpha=0.05):
    """
    Compute the Seasonal Extreme Studentized Deviate of a time series depending on the hybrid.
    """
    ts = np.array(ts)
    seasonal = seasonality or int(
        0.2 * len(ts))  # Seasonality is 20% of the ts if not given.
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual,
                   max_anomalies=max_anomalies,
                   alpha=alpha,
                   hybrid=hybrid)
    return outliers

Esempio n. 15

0

Mostra file

File: hht.py Progetto: yellowsimulator/uibmscode

def burst_amplitude():
    amplitudes = []
    path = "../data/imf_bpfi/"
    files = glob("{}/*".format(path))
    for sample_k, file in enumerate(files):
        print("processing file {}".format(sample_k+1))
        df = pd.read_csv(file)
        columns  = list(df.columns)
        temp_list = []
        temp_dict = {}
        for j, column_name in enumerate(columns):
            imf = list(df[column_name])
            decomp = decompose(imf)
            seasonality = decomp.seasonal
            col = "slt_of_imf{}".format(j+1)
            temp_list.append((col,seasonality))
        temp_dict = dict(temp_list)
        temp_df = pd.DataFrame(temp_dict)
        temp_df.to_csv("../data/stl_bpfi/sample{}_stl.csv".format(sample_k))

Esempio n. 16

0

Mostra file

def stl_decompose(series, period=None):
    """
        Decompose ts using STL - Seasonal and Trend decomposition using Loess

        series - ts data
        period - (largest) period of seasonality
    """
    import stldecompose as stl

    result = stl.decompose(series, period=period)

    fig, axis = plt.subplots(4, 1, figsize=(15, 10))

    result.observed.plot(ax=axis[0], title='Observed')
    result.trend.plot(ax=axis[1], title='Trend')
    result.seasonal.plot(ax=axis[2], title='Seasonal')
    result.resid.plot(ax=axis[3], title='Residual')

    plt.tight_layout()

Esempio n. 17

0

Mostra file

File: AddMeal.py Progetto: tanihajoseph/NewMeal

    def stl(X,ts):
        print("Entering STL")
        from stldecompose import decompose, forecast
        train_size = int(len(X) * 0.90)
        test_size = len(X)-train_size
        train, test = ts[0:train_size], ts[train_size:len(X)]
        
        decomp = decompose(train, period=7)    
        fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True)

        #Error Calculation
        y_pred=[]
        for i in fcast.values:
            y_pred.append(i[0])
        y_true=[]
        for i in test.values:
            y_true.append(i[0])
        Ferror=mean_squared_error(y_true, y_pred)
        print("Leaving STL")
        return decomp,Ferror

Esempio n. 18

0

Mostra file

    def season(data, period, yl):
        # v = int(len(data)//(len(data)//4))
        stl = decompose(data, period=period)
        plt.gcf().clear()
        plt.plot(stl.seasonal, '-', color='b')

        # for i,j in zip(stl.seasonal.index, stl.seasonal.values):
        #     plt.annotate(str(j),xy=(i,j))

        plt.xticks(rotation=30)
        plt.grid(True)
        plt.xlabel(None)
        plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20)
        plt.title('Sales Seasonality', y=1.05, color='#630b0b', fontsize=21)
        plt.tight_layout()

        #         img = io.BytesIO()
        #         plt.savefig(img, format = 'png')
        #         plot = base64.b64encode(img.getvalue()).decode()
        return plt.show

Esempio n. 19

0

Mostra file

def decomposition_plot(series_data, period):
    """
    decomposition of the original signal for preliminary analysis

    Args:
        series_data: Pandas Series object
        period: estimated seasonal frequency
    """
    # if time series is not a Series object, convert it to
    if not isinstance(series_data, pd.Series):
        series_data = pd.Series(series_data)
    # naive additive decomposition
    decomp = seasonal_decompose(series_data.values,
                                model='additive',
                                freq=period)
    decomp.plot()

    # stl decompose
    stl = decompose(series_data.values, period=period)
    stl.plot()
    plt.show()
    plt.pause(0.01)

Esempio n. 20

0

Mostra file

File: gui.py Progetto: fotoply/PowerConsumptionPredictor

    def decomposeSeries(self, ts, decompType=None):
        decomp = None
        if decompType is None:
            decompType = self.decompType.currentText()

        if decompType == "Additive":
            from statsmodels.tsa.seasonal import seasonal_decompose
            decomp = seasonal_decompose(ts, model="additive", freq=96)

        elif decompType == "Multiplicative":
            from statsmodels.tsa.seasonal import seasonal_decompose
            try:
                decomp = seasonal_decompose(ts,
                                            model="multiplicative",
                                            freq=96)
            except:
                decomp = None

        elif decompType == "Loess (STL)":
            from stldecompose import decompose
            decomp = decompose(ts, period=96)

        return decomp

Esempio n. 21

0

Mostra file

def automation_single_ts_arma_analysis(original_df, smoothed_df, smooth_type,
                                       inclusion, stationarity):
    from statsmodels.tsa.arima_model import ARIMA
    from math import ceil
    import numpy as np
    import pandas as pd
    from functions import goodness_prediction_interval, forecast_pred_int, prediction_error

    if smooth_type == 'normal':
        ts = original_df
    else:
        ts = smoothed_df

    if (stationarity == True):
        ###Split the time series dataset into training and testing################
        ts_train = ts[0:ceil(len(ts) * 0.9)]
        ts_test = ts[ceil(len(ts) * 0.9):]

        #find the best ordered ARMA model
        best_hqic = np.inf
        best_order = None
        best_mdl = None

        rng = range(5)
        for p in rng:
            for d in rng:
                for q in rng:
                    try:
                        tmp_mdl = ARIMA(ts_train.values,
                                        order=(p, d, q)).fit(method='mle',
                                                             trend='nc')
                        tmp_hqic = tmp_mdl.hqic
                        if tmp_hqic < best_hqic:
                            best_hqic = tmp_hqic
                            best_order = (p, d, q)
                            best_mdl = tmp_mdl
                    except:
                        continue
        #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order))

        #.plot_redict function has problem.
        firstdate = str(ts_test.index[0])
        lastdate = str(ts_test.index[-1])
        #ts_predict =  best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime())
        #ts_predict = best_mdl.predict(start = ts.index.get_loc(pd.to_datetime(firstdate)), end = ts.index.get_loc(pd.to_datetime(lastdate)))

        ###calcualte the prediction interval.
        ts_forecast, std_error, prediction_interval = best_mdl.forecast(
            len(ts_test))

    else:
        #####remove trend and seasonality from the time series.#################
        from stldecompose import decompose, forecast
        from stldecompose.forecast_funcs import (naive, drift, mean,
                                                 seasonal_naive)

        #########################If the length of the ts is shorter than 130#####
        ########################This is weekly data#############################
        if len(ts) < 130:
            stl = decompose(ts, period=52)
        else:
            if (inclusion == False):
                stl = decompose(ts, period=251)
            else:
                stl = decompose(ts, period=365)

        ######Fit ARMA on the Residual##############
        ts_train = stl.resid[0:ceil(len(stl.resid) * 0.9)]
        ts_test = stl.resid[ceil(len(stl.resid) * 0.9):]

        best_hqic = np.inf
        best_order = None
        best_mdl = None

        rng = range(5)
        for p in rng:
            for d in rng:
                for q in rng:
                    try:
                        tmp_mdl = ARIMA(ts_train.values,
                                        order=(p, d, q)).fit(method='mle',
                                                             trend='nc')
                        tmp_hqic = tmp_mdl.hqic
                        if tmp_hqic < best_hqic:
                            best_hqic = tmp_hqic
                            best_order = (p, d, q)
                            best_mdl = tmp_mdl
                    except:
                        continue
        #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order))

        #######Prediction#################
        firstdate = str(ts_test.index[0])
        lastdate = str(ts_test.index[-1])

        #ts_predict =  best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime())
        ts_predict = best_mdl.predict(
            start=ts.index.get_loc(pd.to_datetime(firstdate)),
            end=ts.index.get_loc(pd.to_datetime(lastdate)))

        #######Add back the trend and seasonality ########
        ts_predict = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime(
        )] + stl.trend.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime()] + pd.Series(index=ts_test.index,
                                                         data=ts_predict)

        #########Compute the prediction interval
        ts_forecast, std_error, prediction_interval = best_mdl.forecast(
            len(ts_test))
        difference = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime()] + stl.trend.units.loc[
            ts_test.index[0].to_pydatetime():ts_test.index[-1].to_pydatetime()]

        def f(a):
            return (a + difference)

        prediction_interval = np.apply_along_axis(f, 0, prediction_interval)

    ########Compute the prediction error#############
    pe = prediction_error(ts_test.units,
                          ts_forecast,
                          original_df=original_df,
                          smooth_type=smooth_type)

    #######Assess the goodness of prediction interval########################
    acc_pi, avg_diff_pi = goodness_prediction_interval(ts_test,
                                                       prediction_interval)

    #    ############Plot the prediction and prediction intervals###################
    #    from func_visualisation import plot_prediction
    #    plot_prediction(df, prediction, prediction_interval)
    #

    return best_order, pe, acc_pi, avg_diff_pi

Esempio n. 22

0

Mostra file

    decomposition, hence the parameter seasonality. If none is given,
    then it will automatically be calculated to be 20% of the total
    timeseries.
    Args:
    ts (list or np.array): The timeseries to compute the ESD.
    seasonality (int): Number of time points for a season.
    hybrid (bool): See Twitter's research paper for difference.
    max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
    alpha (float): The significance level.
    rtype:
    list int: The indices of the anomalies in the timeseries.
    """
    ts = np.array(ts)
    # Seasonality is 20% of the ts if not given.
    seasonal = seasonality or int(0.2 * len(ts))
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual, max_anomalies=max_anomalies,
                   alpha=alpha, hybrid=hybrid)
    return outliers


def esd(ts, max_anomalies=10, alpha=0.05, hybrid=False):
    """
    Compute the Extreme Studentized Deviate of a time series.
    A Grubbs Test is performed max_anomalies times with the caveat
       that each time the top value is removed. For more details visit
       http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm
    Args:
        ts (list or np.array): The time series to compute the ESD.
        max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.

Esempio n. 23

0

Mostra file

plt.show()

AirP["Residual"] = AirP["Season"] - AirP["Season_ave"]
AirP["Residual"].plot()
plt.show()


seasonal_decompose(AirP["Passengers"], model = "additive", freq = 12).plot()
plt.show()

seasonal_decompose(np.log(AirP["Passengers"]), model = "add").resid.plot()
plt.show()


from stldecompose import decompose
decompose(np.log(AirP["Passengers"]), period = 12).plot();

# =============================================================================
# vierteljährliche Bierproduktion in Australien (in Megaliter) zwischen 
# März 1956 und Juni 1994
# =============================================================================
AusBeer = pd.read_csv("data/AustralianBeer.csv", sep=";")
AusBeer.head()

AusBeer1 = AusBeer.copy()
AusBeer1.head()

AusBeer1["Quarter"] = pd.DatetimeIndex(AusBeer1["Quarter"])

AusBeer1.set_index("Quarter", inplace = True)
AusBeer1.head()

Esempio n. 24

0

Mostra file

File: Modified_timeseries.py Progetto: olashile/TimeSeriesForecasting

def get_decomposotion(insamaple_data, p):
    dec = decompose(insamaple_data, period=p)
    return dec.trend, dec.seasonal, dec.resid

Esempio n. 25

0

Mostra file

def main():
    '''
    Main function that generates the result.
    '''
    # load data
    data = pd.read_csv(args.excep_train, parse_dates=["SHIFT_DATE"])
    # create train, val, and test
    train = data[(data["SHIFT_DATE"] > "2012-12-31")
                 & (data["SHIFT_DATE"] < "2018-01-01")]
    val = data[(data["SHIFT_DATE"] > "2017-12-31")
               & (data["SHIFT_DATE"] < "2019-01-01")]

    # using only a portion of the sites
    train_clean = train[(train["SITE"] == "St Paul's Hospital") |
                        (train["SITE"] == "Mt St Joseph") |
                        (train["SITE"] == "Holy Family") |
                        (train["SITE"] == "SVH Langara") |
                        (train["SITE"] == "Brock Fahrni") |
                        (train["SITE"] == "Youville Residence")]
    train_clean = train_clean[(train_clean["JOB_FAMILY"] == "DC1000") |
                              (train_clean["JOB_FAMILY"] == "DC2A00") |
                              (train_clean["JOB_FAMILY"] == "DC2B00")]

    val_clean = val[(val["SITE"] == "St Paul's Hospital") |
                    (val["SITE"] == "Mt St Joseph") |
                    (val["SITE"] == "Holy Family") |
                    (val["SITE"] == "SVH Langara") |
                    (val["SITE"] == "Brock Fahrni") |
                    (val["SITE"] == "Youville Residence")]
    val_clean = val_clean[(val_clean["JOB_FAMILY"] == "DC1000") |
                          (val_clean["JOB_FAMILY"] == "DC2A00") |
                          (val_clean["JOB_FAMILY"] == "DC2B00")]

    # create training dataframes
    splitting_train = train_clean.groupby(
        ["JOB_FAMILY", "SITE", "SUB_PROGRAM",
         "SHIFT_DATE"]).size().reset_index()
    splitting_train = splitting_train.rename({
        "SHIFT_DATE": "ds",
        0: "y"
    },
                                             axis=1)

    # create validation dataframes
    splitting_val = val_clean.groupby(
        ["JOB_FAMILY", "SITE", "SUB_PROGRAM",
         "SHIFT_DATE"]).size().reset_index()
    splitting_val = splitting_val.rename({"SHIFT_DATE": "ds", 0: "y"}, axis=1)

    # create timeframe data for prediction
    total_timeframe = pd.DataFrame(
        pd.date_range(start='2013-01-01', end='2017-12-31',
                      freq="D")).rename({0: "ds"}, axis=1)
    timeframe = pd.DataFrame(
        pd.date_range(start='2018-01-01', end='2018-12-31',
                      freq="D")).rename({0: "ds"}, axis=1)

    # unique combinations
    sites = train_clean["SITE"].unique()
    job_families = train_clean["JOB_FAMILY"].unique()
    sub_programs = train_clean["SUB_PROGRAM"].unique()

    # create and store predictions and true results
    models = {}
    split_data = {}
    pred_results_past = {}
    pred_results_future = {}
    true_results = {}
    for i in sites:
        for j in job_families:
            for k in sub_programs:
                temp_data_train = splitting_train[
                    (splitting_train["SITE"] == i)
                    & (splitting_train["JOB_FAMILY"] == j) &
                    (splitting_train["SUB_PROGRAM"] == k)].reset_index()
                temp_data_train = pd.merge(total_timeframe,
                                           temp_data_train,
                                           on="ds",
                                           how="outer")
                temp_data_train["y"] = temp_data_train["y"].fillna(0)

                temp_data_val = splitting_val[
                    (splitting_val["SITE"] == i)
                    & (splitting_val["JOB_FAMILY"] == j) &
                    (splitting_val["SUB_PROGRAM"] == k)].reset_index(drop=True)
                temp_data_val = pd.merge(timeframe,
                                         temp_data_val,
                                         on="ds",
                                         how="outer")
                temp_data_val["y"] = temp_data_val["y"].fillna(0)

                split_data[(i, j, k)] = temp_data_train
                true_results[(i, j, k)] = temp_data_val
                if temp_data_val["y"].sum() >= 300.0:
                    pred_results_past[(i, j,
                                       k)], models[(i, j, k)] = run_prophet(
                                           temp_data_train, total_timeframe)
                    pred_results_future[(i, j,
                                         k)] = models[(i, j,
                                                       k)].predict(timeframe)
                    print("Fitting -", i, j, k, ": Done")

    # combine predictions and true results
    combined = {}
    for i in pred_results_future:
        combined[i] = pd.merge(
            true_results[i], pred_results_future[i], on="ds",
            how="outer")[["ds", "y", "yhat", "yhat_lower", "yhat_upper"]]

    # convert to week and calculating errors weekly
    weekly = {}
    for i in combined:
        # create week column
        combined[i]["ds"] = combined[i]["ds"] - pd.DateOffset(weekday=0,
                                                              weeks=1)
        combined[i]["week"] = combined[i]["ds"].dt.week

        # store y, yhat, yhat_lower, yhat_upper
        weekly_y = combined[i].groupby("ds").y.sum().reset_index()
        weekly_yhat = combined[i].groupby("ds").yhat.sum().astype(
            int).reset_index()
        weekly_yhat_lower = combined[i].groupby("ds").yhat_lower.sum().astype(
            int).reset_index()
        weekly_yhat_upper = combined[i].groupby("ds").yhat_upper.sum().astype(
            int).reset_index()

        # replace negative prediction values with 0
        weekly_yhat = weekly_yhat.where(weekly_yhat["yhat"] >= 0, 0)
        weekly_yhat_lower = weekly_yhat_lower.where(
            weekly_yhat_lower["yhat_lower"] >= 0, 0)
        weekly_yhat_upper = weekly_yhat_upper.where(
            weekly_yhat_upper["yhat_upper"] >= 0, 0)

        # merge weekly results
        weekly[i] = pd.concat([
            weekly_y, weekly_yhat["yhat"], weekly_yhat_lower["yhat_lower"],
            weekly_yhat_upper["yhat_upper"]
        ],
                              axis=1)

        # create columns "year", "site", "job_family", "sub_program"
        length = weekly[i].shape[0]
        weekly[i]["week"] = weekly[i]["ds"].dt.weekofyear
        weekly[i]["site"] = np.repeat(i[0], length)
        weekly[i]["job_family"] = np.repeat(i[1], length)
        weekly[i]["sub_program"] = np.repeat(i[2], length)

    # model residuals
    for i in weekly:
        forecasted = pred_results_past[i]
        actual = split_data[i]

        error = actual["y"] - forecasted["yhat"]
        obs = total_timeframe.copy()
        obs["error"] = error
        obs = obs.set_index("ds")

        decomp = decompose(obs, period=365)
        weekly_fcast = forecast(decomp,
                                steps=365,
                                fc_func=drift,
                                seasonal=True)
        weekly_fcast["week"] = weekly_fcast.index - pd.DateOffset(weekday=0,
                                                                  weeks=1)
        weekly_fcast = weekly_fcast.groupby("week").sum()

        resid_fcast = weekly_fcast.reset_index()["drift+seasonal"]
        weekly_yhat = (weekly[i]["yhat"] + resid_fcast).round(0)
        weekly_yhat_lower = (weekly[i]["yhat_lower"] + resid_fcast).round(0)
        weekly_yhat_upper = (weekly[i]["yhat_upper"] + resid_fcast).round(0)

        weekly[i]["yhat"] = weekly_yhat.where(weekly_yhat >= 0, 0)
        weekly[i]["yhat_lower"] = weekly_yhat_lower.where(
            weekly_yhat_lower >= 0, 0)
        weekly[i]["yhat_upper"] = weekly_yhat_upper.where(
            weekly_yhat_upper >= 0, 0)

    # create data/predictions folder if it doesn't exist
    predictions_path = "../data/predictions/"
    if not os.path.exists(predictions_path):
        os.mkdir(predictions_path)

    # export to "data/predictions/" directory
    total_data = pd.DataFrame()
    for i in weekly:
        total_data = pd.concat([total_data, weekly[i]], axis=0)
    total_data.to_csv(predictions_path + "exception_predictions.csv")

Esempio n. 26

0

Mostra file

plt.rc('figure',figsize=(14,6))
plt.rc('font',size=13)


result = seasonal_decompose(data['Adj. Close'],freq=252, 
                            model='additive')
result.plot()
plt.show()

from fbprophet import Prophet

pip install stldecompose

from stldecompose import decompose, forecast
stl = decompose(data['Adj. Close'])
stl.plot()
plt.show()

df.tail()

DF = df[['Adj. Close']].copy()
DF.reset_index(drop=False, inplace=True)
DF.rename(columns={'Date': 'ds', 'Adj. Close': 'y'}, inplace=True)
DF.tail()

#Split the series into the training and test sets:
train_indices = DF.ds.apply(lambda x: x.year) < 2017
X_train = DF.loc[train_indices].dropna()
X_test = DF.loc[~train_indices].reset_index(drop=True)

Esempio n. 27

0

Mostra file

def stl(train_set, test_set, params):
    complete_set = train_set + test_set
    decomposition = decompose(complete_set, period=365)
    forecast = [(decomposition.seasonal[i] + decomposition.trend[i]) for i in range(0, len(decomposition.seasonal))]
    return forecast[-len(test_set):], None

Esempio n. 28

0

Mostra file

File: ModelTraining.py Progetto: dilmihir123/Optimised_food_inventory

def stl(k):
    from stldecompose import decompose, forecast
    ar=new_tab.loc[(k)].values
    #print(len(ar))
    a=[]
    for i in range(len(ar)):
        a.append(ar[i][0])
    
    def timeseries_df():
        index = pd.date_range(start="01-01-2017", periods=len(a), freq='W-SAT')
        ts = pd.DataFrame(a, index=index, columns=['num_orders'])
        ts['num_orders']=a
        return ts
    
    
    ts = timeseries_df()
    #print(ts)
    #print(ts.index)
    X = ts.values
    train_size = int(len(X) * 0.60)
    test_size = len(X)-train_size
    #print(test_size," ", train_size)
    #training, testing = ts[0:train_size], ts[train_size:len(X)]
    train, test = ts[0:train_size], ts[train_size:len(X)]
    #print(train)
    #print(test)
    #print('Observations: %d' % (len(X)))
    #print('Training Observations: %d' % (len(train)))
    #print('Testing Observations: %d' % (len(test)))

    trend=['add','add','mul','mul']
    seasonal=['add','mul','add','mul']
    
    
    #print(test)
    
    decomp = decompose(train, period=7)
    #print(decomp)
    #print(type(decomp))
    #s=sm.tsa.seasonal_decompose(train)
    
    #print("trend")
    #print(decomp.trend)
    #print(decomp.resid)
    
    #print("season")
    #print(decomp.seasonal)
    
    fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True)
    #print(fcast)
    y_pred=[]
    for i in fcast.values:
        y_pred.append(i[0])
    #print(y_pred)
    y_true=[]
    for i in test.values:
        y_true.append(i[0])
    #print(y_true)
    Ferror=mean_squared_error(y_true, y_pred)
    
    return decomp,Ferror,test_size

Esempio n. 29

0

Mostra file

File: pipeline.py Progetto: adaj/sensingbee

                    'iterations': 2, 'cv':5, 'scoring':'r2'}
m = sensingbee.ml_modeling.Model(estimator, tuning_conf).fit(X, y.loc[X.index])
print('R² = ',m.base_estimator.best_score_)
m.feature_importances_


#
# Temporal (STL) features prototyping
#
import pandas as pd
from stldecompose import decompose

# Global STL
ts = y.groupby('Timestamp').median()
ts.index = pd.to_datetime(ts.index)
stl = decompose(ts, period=7)
Xt = pd.DataFrame()
Xt['NO2_trend'] = stl.trend['Value']
Xt['NO2_seasonal'] = stl.seasonal['Value']
Xt['NO2_diff'] = ts.diff().fillna(0)['Value']
Xt.index = X.index.get_level_values('Timestamp').unique()

idx = pd.IndexSlice
for s in X.index.get_level_values('Sensor Name').unique():
    x = X.loc[idx[s,:],'NO2']
    ts = x.reset_index('Sensor Name',drop=True)
    ts.index = pd.to_datetime(ts.index)
    print(ts.shape, s)
    stl = decompose(ts, period=7)
    X.loc[x.index, 'NO2_trend'] = stl.trend
    X.loc[x.index, 'NO2_seasonal'] = stl.seasonal

Esempio n. 30

0

Mostra file

#
#
# The Data
#
# The [messages-per-hour] contains number of messages in an hour.
# There is a seasonality for working hours and working days (MON-FRI).
# At '2019-07-28 15:00:00' there is an anomaly (too many messages).
# Depending on the value of alpha (for moving STD) the anomaly can be in or out of the sleeve.
#

df = pd.read_csv('data/messages-per-hour.csv', names=['Hour', 'Count'], parse_dates=True)
df['Hour'] = pd.to_datetime(df['Hour'])
df = df.set_index('Hour')

# Break down the signal into STL parts
stl: DecomposeResult = decompose(df, period=24*7, lo_frac=0.7)
original = stl.__getattribute__('observed')
trend = stl.__getattribute__('trend')
seasonality = stl.__getattribute__('seasonal')
residual = stl.__getattribute__('resid')
sleeve_center = trend + seasonality

# Fixed Height Sleeve
fixed_std = residual.std()
df['upper_bound'] = sleeve_center + fixed_std * 3
df['lower_bound'] = sleeve_center - fixed_std * 3
plot_draw.draw(df, 'df-fixed-std')

# Variant Height Sleeve (MSTD with alpha=0.05)
moving_std = residual.ewm(alpha=0.05, min_periods=20, adjust=False).std()
df['upper_bound'] = sleeve_center + moving_std * 3