Exemple #1
0
def seasonal_esd(ts,
                 seasonality=None,
                 hybrid=False,
                 max_anomalies=10,
                 alpha=0.05):
    """
    Compute the Seasonal Extreme Studentized Deviate of a time series.
    The steps taken are first to to decompose the time series into STL
    decomposition (trend, seasonality, residual). Then, calculate
    the Median Absolute Deviate (MAD) if hybrid (otherwise the median)
    and perform a regular ESD test on the residual, which we calculate as:
                    R = ts - seasonality - MAD or median
    Note: The statsmodel library requires a seasonality to compute the STL
    decomposition, hence the parameter seasonality. If none is given,
    then it will automatically be calculated to be 20% of the total
    timeseries.
    Args:
    ts (list or np.array): The timeseries to compute the ESD.
    seasonality (int): Number of time points for a season.
    hybrid (bool): See Twitter's research paper for difference.
    max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
    alpha (float): The significance level.
    Returns:
    list int: The indices of the anomalies in the timeseries.
    """
    ts = np.array(ts)
    seasonal = seasonality or int(
        0.2 * len(ts))  # Seasonality is 20% of the ts if not given.
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual,
                   max_anomalies=max_anomalies,
                   alpha=alpha,
                   hybrid=hybrid)
    return outliers
def calculate_decomposition(data, model=MODEL_ADDITIVE, frequency=2):
    """
    Calculate time series decomposition

    Args:
        data (list[float]): Input time series values
        model (str): Seasonal component type
        frequency (int): Seasonal component frequency

    Returns:
        dict: Calculation results (trend, seasonal, residual components)
    """
    # Prepare data
    if model == MODEL_MULTIPLICATIVE:
        data = [log(x) for x in data]
    # Use model
    decomp = decompose(data, period=frequency)
    # Prepare result data
    if model == MODEL_MULTIPLICATIVE:
        decomp.trend = [exp(x) for x in decomp.trend]
        decomp.seasonal = [exp(x) for x in decomp.seasonal]
        decomp.resid = [exp(x) for x in decomp.resid]
    return {
        'trend': decomp.trend,
        'seasonal': decomp.seasonal,
        'resid': decomp.resid,
    }
Exemple #3
0
def get_imf(exp_numb,channel):
    samples = 100#728 #714, 724(4)
    path = "../data/imf_bpfo/sample{}_imfs.csv".format(samples)
    df = pd.read_csv(path)
    #new_df = df.loc[:,"imf1":"imf4"]
    #new_df.plot()
    #plt.show()
    #exit()
    k = 6
    #for k in range(1,9):
    s = df["imf{}".format(k)].values

    decomp = decompose(s)
    lim = 10000
    seasonality = decomp.seasonal[:lim]

    t = np.linspace(0,1, lim)
    plt.plot(t,seasonality)
    #data = get_experiment_bearing_data(exp_numb,channel)
    #k = 850
    #j = 8
    #signal = data[k]
    #imfs = get_imfs(signal)
    #imf = imfs[j]
    #decomp = decompose(imf)
    #pulse = decomp.seasonal
    #lim = 10000
    #plt.plot(pulse[:lim])
    plt.show()
Exemple #4
0
def series_decompose(series_df):
    '''
    Description: Runs the stldecompose decompose method on a time series from create_float_series by adding 96 (number of points in a day in the database) as period value
    Parameters: A pandas DataFrame time series
    Returns: a statsmodel object representing the decomposed series 
    '''
    decomped_series = decompose(series_df.values, period=96)
    return decomped_series
def _type_std(dataframe, type_count, type_id, length):
    seasonal_vals = np.zeros(
        (len(dataframe.columns) // type_count, len(dataframe)))
    for i in range(type_id, len(dataframe.columns), type_count):
        seasonal_vals[i // type_count] = stl.decompose(
            dataframe[i], period=length).seasonal.values
    type_std = np.mean(np.std(seasonal_vals, axis=0))
    return length, type_std
Exemple #6
0
def main():

    #load data

    e = exchange.Exchange('../../lib/binance.db')

    start = int(datetime.datetime(2018, 4, 1).timestamp() * 1000)
    end = int(datetime.datetime(2019, 5, 1).timestamp() * 1000)
    #end = int(datetime.datetime(2018, 5, 1).timestamp() * 1000)

    print('Loading order data...')

    number_of_orders, prices = e.get_total_orders_ts('BTCUSDT',
                                                     60 * 60 * 1000 * 6, start,
                                                     end)  #hourly data

    print('done')

    buy_orders = np.array([b for s, b in number_of_orders])
    sell_orders = np.array([s for s, b in number_of_orders])

    buy_orders[buy_orders <= 0] = np.mean(buy_orders)
    sell_orders[sell_orders <= 0] = np.mean(sell_orders)

    returns = np.array(calculate_returns(prices))

    returns_decomp = decompose(returns, period=24 * 7)

    sa_returns = returns_decomp.trend + returns_decomp.resid

    buy_orders_decomp = decompose(buy_orders, period=24 * 7)
    sa_buy_orders = buy_orders_decomp.trend + buy_orders_decomp.resid

    sell_orders_decomp = decompose(sell_orders, period=24 * 7)
    sa_sell_orders = sell_orders_decomp.trend + sell_orders_decomp.resid

    x, corrs = get_correlation_coef(
        buy_orders_decomp.resid - sell_orders_decomp.resid,
        returns_decomp.resid, 28, False)

    plt.bar(x, corrs)

    plt.show()
def plot_seasonal(df, tic, OUT_DIR):
    df_close = df[['date', 'close']].copy()
    df_close = df_close.set_index('date')

    decomp = decompose(df_close, period=365)
    fig = decomp.plot()
    fig.set_size_inches(20, 8)

    filename = OUT_DIR + tic + "_decompose.png"
    plt.savefig(filename, dpi=500)
def _type_std_seasonal(dataframe, type_count, length):
    # pylint: disable=no-member
    seasonal_vals = np.zeros((len(dataframe.columns), len(dataframe)))
    for i in range(len(dataframe.columns)):
        seasonal_vals[i] = stl.decompose(dataframe[i],
                                         period=length).seasonal.values
    type_stds = [
        np.mean(np.std(seasonal_vals[t::type_count], axis=0))
        for t in range(type_count)
    ]
    return length, np.mean(type_stds)
Exemple #9
0
    def stl_seasonal_decomposition(self):
        if self.has_validation_error:
            return
        # Decomposition based on stl - Package: stldecompose
        org_unit_group_stl = decompose(self.series, period=12)

        fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 9))
        self.series.plot(ax=ax1)
        org_unit_group_stl.trend.plot(ax=ax2)
        org_unit_group_stl.seasonal.plot(ax=ax3)
        org_unit_group_stl.resid.plot(ax=ax4)
        ax1.set_title("Vaccine Demand for {} in {}".format(
            self.vaccine, self.health_facility))
        ax2.set_title("Trend")
        ax3.set_title("Seasonality")
        ax4.set_title("Residuals")
        plt.tight_layout()
        plt.show()

        # Eliminating the seasonal component
        org_unit_group_adjusted = self.series - org_unit_group_stl.seasonal
        plt.figure(figsize=(12, 8))
        org_unit_group_adjusted.plot()
        plt.title(
            "Plot of Vaccine Demand of {} in {} without Seasonal Component".
            format(self.vaccine, self.health_facility))
        plt.show()
        #
        # Getting the seasonal component only
        # Seasonality gives structure to the data
        plt.figure(figsize=(12, 8))
        org_unit_group_stl.seasonal.plot()
        plt.title(
            "Plot of Seasonal Component of Vaccine Demand of {} in {}".format(
                self.vaccine, self.health_facility))
        plt.show()

        # Creating a forecast based on STL
        stl_fcast = forecast(org_unit_group_stl,
                             steps=12,
                             fc_func=seasonal_naive,
                             seasonal=True)

        # Plot of the forecast and the original data
        plt.figure(figsize=(12, 8))
        plt.plot(self.series, label='BCG Wastage Rate')
        plt.plot(stl_fcast, label=stl_fcast.columns[0])
        plt.title(
            "Plot of Vaccine Demand of {} in {} Next Year Forecast".format(
                self.vaccine, self.health_facility))
        plt.legend()
        plt.show()
Exemple #10
0
    def detrend(self, label=None):
        """Returns the timeseries without trend component

            Args:
                None
            Returns:
                A FixedIndexTimeseries Object

            Raises:
                None
            """
        dec = decompose(self.timeseries.values, period=self.maxindex)
        return FixedIndexTimeseries(pandas.Series(dec.resid, index=self.timeseries.index)+pandas.Series(dec.seasonal, index=self.timeseries.index), mode=self.mode, label=self.label if label is None else label)
def createDataset_uber(look_back):
    scaler = preprocessing.MinMaxScaler()
    df = pd.read_csv(DATASET_STATES_CSV,
                     index_col=0,
                     parse_dates=True,
                     encoding="utf-8")
    df = normalize_df(df, DATA_SCALER())
    df += 1
    df = df.apply(np.log)
    df2 = df.copy()
    for col in df:
        decomp = decompose(df[col], period=12)
        df[col] = decomp.trend + decomp.resid
        df2[col + '_t'] = decomp.trend
        df2[col + '_s'] = decomp.seasonal
        df2[col + '_r'] = decomp.resid
    res = []
    for col in df:
        x, y, offsets, x_s, y_s = create_dataset_uberSc(
            df[col], df2[col + '_t'], df2[col + '_s'], look_back)
        x_test = np.copy(x[-FORECASTING_STEPS:])
        y_test = np.copy(y[-FORECASTING_STEPS:])
        offsets_test = np.copy(offsets[-FORECASTING_STEPS:])
        x_s_test = np.copy(x_s[-FORECASTING_STEPS:])
        y_s_test = np.copy(y_s[-FORECASTING_STEPS:])
        x = x[:-FORECASTING_STEPS]
        y = y[:-FORECASTING_STEPS]
        x_s = x_s[:-FORECASTING_STEPS]
        y_s = y_s[:-FORECASTING_STEPS]
        offsets = offsets[:-FORECASTING_STEPS]
        x_val = np.copy(x[-1:])
        y_val = np.copy(y[-1:])
        x_s_val = np.copy(x_s[-1:])
        y_s_val = np.copy(y_s[-1:])
        offsets_val = np.copy(offsets[-1:])
        x_train = np.copy(x[:-1])
        y_train = np.copy(y[:-1])
        x_s_train = np.copy(x_s[:-1])
        y_s_train = np.copy(y_s[:-1])
        offsets_train = np.copy(offsets[:-1])
        #y_test = test
        #x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.33, random_state=RANDOM_SEED)
        res.append((x_train, x_val, x_test, y_train, y_val, y_test,
                    offsets_train, offsets_val, offsets_test, x_s_train,
                    x_s_val, x_s_test, y_s_train, y_s_val, y_s_test))
    return res
Exemple #12
0
    def trend(data, period, yl):
        stl = decompose(data, period=len(data))
        plt.figure(facecolor='#ffffff')
        plt.gcf().clear()
        plt.plot(stl.trend, 'o-', marker='o', color='b')
        plt.xticks(rotation=30)
        plt.grid(True, color='#e5e5cc', linestyle='-', linewidth=1)
        plt.xlabel(None)
        plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20)
        plt.title('Sales Trends', y=1.05, color='#630b0b', fontsize=21)
        plt.tight_layout()

        #         img = io.BytesIO()
        #         plt.savefig(img, format = 'png')
        #         plot = base64.b64encode(img.getvalue()).decode()
        #         return plot
        return plt.show
Exemple #13
0
    def create_model(Date):
        i=totalMeals
        ts = timeseries_df(i,Date)
        X = ts.values

        #STL
        modelSTL,errorSTL=stl(X,ts)

        #ETS
        modelETS,errorETS=ets(X)

        #Comparing errors of ETS and STL
        print(errorSTL)
        print(errorETS)
        error=min(errorSTL,errorETS)
        if(error==errorSTL):
            FinalModel=modelSTL
            FModel='STL'
            print("STL")
        elif(error==errorETS):
            FinalModel=modelETS
            FModel='ETS'
            print("ETS")

        #If STL Model is appropriate
        if(FModel=='STL'):
            from stldecompose import decompose, forecast
            globals()['STL%s' % i] = FinalModel
            STL.append(i)
            FinalModel = decompose(ts, period=7)    
            joblib.dump(FinalModel, 'STL'+ str(i) +'.xml', compress=1)

        #If ETS Model is appropriate
        elif(FModel=='ETS'):
            globals()['ETS%s' % i] = FinalModel
            ETS.append(i)
            if(modelETS==1):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='add',damped=True).fit(use_boxcox=True)
            if(modelETS==2):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='mul',damped=True).fit(use_boxcox=True)
            if(modelETS==3):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='add',damped=True).fit(use_boxcox=True)
            if(modelETS==4):
                FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='mul',damped=True).fit(use_boxcox=True)
            joblib.dump(FinalModel, 'ETS'+ str(i) +'.xml', compress=1)
Exemple #14
0
def seasonal_esd(ts,
                 seasonality=None,
                 hybrid=False,
                 max_anomalies=10,
                 alpha=0.05):
    """
    Compute the Seasonal Extreme Studentized Deviate of a time series depending on the hybrid.
    """
    ts = np.array(ts)
    seasonal = seasonality or int(
        0.2 * len(ts))  # Seasonality is 20% of the ts if not given.
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual,
                   max_anomalies=max_anomalies,
                   alpha=alpha,
                   hybrid=hybrid)
    return outliers
Exemple #15
0
def burst_amplitude():
    amplitudes = []
    path = "../data/imf_bpfi/"
    files = glob("{}/*".format(path))
    for sample_k, file in enumerate(files):
        print("processing file {}".format(sample_k+1))
        df = pd.read_csv(file)
        columns  = list(df.columns)
        temp_list = []
        temp_dict = {}
        for j, column_name in enumerate(columns):
            imf = list(df[column_name])
            decomp = decompose(imf)
            seasonality = decomp.seasonal
            col = "slt_of_imf{}".format(j+1)
            temp_list.append((col,seasonality))
        temp_dict = dict(temp_list)
        temp_df = pd.DataFrame(temp_dict)
        temp_df.to_csv("../data/stl_bpfi/sample{}_stl.csv".format(sample_k))
Exemple #16
0
def stl_decompose(series, period=None):
    """
        Decompose ts using STL - Seasonal and Trend decomposition using Loess

        series - ts data
        period - (largest) period of seasonality
    """
    import stldecompose as stl

    result = stl.decompose(series, period=period)

    fig, axis = plt.subplots(4, 1, figsize=(15, 10))

    result.observed.plot(ax=axis[0], title='Observed')
    result.trend.plot(ax=axis[1], title='Trend')
    result.seasonal.plot(ax=axis[2], title='Seasonal')
    result.resid.plot(ax=axis[3], title='Residual')

    plt.tight_layout()
Exemple #17
0
    def stl(X,ts):
        print("Entering STL")
        from stldecompose import decompose, forecast
        train_size = int(len(X) * 0.90)
        test_size = len(X)-train_size
        train, test = ts[0:train_size], ts[train_size:len(X)]
        
        decomp = decompose(train, period=7)    
        fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True)

        #Error Calculation
        y_pred=[]
        for i in fcast.values:
            y_pred.append(i[0])
        y_true=[]
        for i in test.values:
            y_true.append(i[0])
        Ferror=mean_squared_error(y_true, y_pred)
        print("Leaving STL")
        return decomp,Ferror
Exemple #18
0
    def season(data, period, yl):
        # v = int(len(data)//(len(data)//4))
        stl = decompose(data, period=period)
        plt.gcf().clear()
        plt.plot(stl.seasonal, '-', color='b')

        # for i,j in zip(stl.seasonal.index, stl.seasonal.values):
        #     plt.annotate(str(j),xy=(i,j))

        plt.xticks(rotation=30)
        plt.grid(True)
        plt.xlabel(None)
        plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20)
        plt.title('Sales Seasonality', y=1.05, color='#630b0b', fontsize=21)
        plt.tight_layout()

        #         img = io.BytesIO()
        #         plt.savefig(img, format = 'png')
        #         plot = base64.b64encode(img.getvalue()).decode()
        return plt.show
Exemple #19
0
def decomposition_plot(series_data, period):
    """
    decomposition of the original signal for preliminary analysis

    Args:
        series_data: Pandas Series object
        period: estimated seasonal frequency
    """
    # if time series is not a Series object, convert it to
    if not isinstance(series_data, pd.Series):
        series_data = pd.Series(series_data)
    # naive additive decomposition
    decomp = seasonal_decompose(series_data.values,
                                model='additive',
                                freq=period)
    decomp.plot()

    # stl decompose
    stl = decompose(series_data.values, period=period)
    stl.plot()
    plt.show()
    plt.pause(0.01)
    def decomposeSeries(self, ts, decompType=None):
        decomp = None
        if decompType is None:
            decompType = self.decompType.currentText()

        if decompType == "Additive":
            from statsmodels.tsa.seasonal import seasonal_decompose
            decomp = seasonal_decompose(ts, model="additive", freq=96)

        elif decompType == "Multiplicative":
            from statsmodels.tsa.seasonal import seasonal_decompose
            try:
                decomp = seasonal_decompose(ts,
                                            model="multiplicative",
                                            freq=96)
            except:
                decomp = None

        elif decompType == "Loess (STL)":
            from stldecompose import decompose
            decomp = decompose(ts, period=96)

        return decomp
Exemple #21
0
def automation_single_ts_arma_analysis(original_df, smoothed_df, smooth_type,
                                       inclusion, stationarity):
    from statsmodels.tsa.arima_model import ARIMA
    from math import ceil
    import numpy as np
    import pandas as pd
    from functions import goodness_prediction_interval, forecast_pred_int, prediction_error

    if smooth_type == 'normal':
        ts = original_df
    else:
        ts = smoothed_df

    if (stationarity == True):
        ###Split the time series dataset into training and testing################
        ts_train = ts[0:ceil(len(ts) * 0.9)]
        ts_test = ts[ceil(len(ts) * 0.9):]

        #find the best ordered ARMA model
        best_hqic = np.inf
        best_order = None
        best_mdl = None

        rng = range(5)
        for p in rng:
            for d in rng:
                for q in rng:
                    try:
                        tmp_mdl = ARIMA(ts_train.values,
                                        order=(p, d, q)).fit(method='mle',
                                                             trend='nc')
                        tmp_hqic = tmp_mdl.hqic
                        if tmp_hqic < best_hqic:
                            best_hqic = tmp_hqic
                            best_order = (p, d, q)
                            best_mdl = tmp_mdl
                    except:
                        continue
        #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order))

        #.plot_redict function has problem.
        firstdate = str(ts_test.index[0])
        lastdate = str(ts_test.index[-1])
        #ts_predict =  best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime())
        #ts_predict = best_mdl.predict(start = ts.index.get_loc(pd.to_datetime(firstdate)), end = ts.index.get_loc(pd.to_datetime(lastdate)))

        ###calcualte the prediction interval.
        ts_forecast, std_error, prediction_interval = best_mdl.forecast(
            len(ts_test))

    else:
        #####remove trend and seasonality from the time series.#################
        from stldecompose import decompose, forecast
        from stldecompose.forecast_funcs import (naive, drift, mean,
                                                 seasonal_naive)

        #########################If the length of the ts is shorter than 130#####
        ########################This is weekly data#############################
        if len(ts) < 130:
            stl = decompose(ts, period=52)
        else:
            if (inclusion == False):
                stl = decompose(ts, period=251)
            else:
                stl = decompose(ts, period=365)

        ######Fit ARMA on the Residual##############
        ts_train = stl.resid[0:ceil(len(stl.resid) * 0.9)]
        ts_test = stl.resid[ceil(len(stl.resid) * 0.9):]

        best_hqic = np.inf
        best_order = None
        best_mdl = None

        rng = range(5)
        for p in rng:
            for d in rng:
                for q in rng:
                    try:
                        tmp_mdl = ARIMA(ts_train.values,
                                        order=(p, d, q)).fit(method='mle',
                                                             trend='nc')
                        tmp_hqic = tmp_mdl.hqic
                        if tmp_hqic < best_hqic:
                            best_hqic = tmp_hqic
                            best_order = (p, d, q)
                            best_mdl = tmp_mdl
                    except:
                        continue
        #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order))

        #######Prediction#################
        firstdate = str(ts_test.index[0])
        lastdate = str(ts_test.index[-1])

        #ts_predict =  best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime())
        ts_predict = best_mdl.predict(
            start=ts.index.get_loc(pd.to_datetime(firstdate)),
            end=ts.index.get_loc(pd.to_datetime(lastdate)))

        #######Add back the trend and seasonality ########
        ts_predict = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime(
        )] + stl.trend.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime()] + pd.Series(index=ts_test.index,
                                                         data=ts_predict)

        #########Compute the prediction interval
        ts_forecast, std_error, prediction_interval = best_mdl.forecast(
            len(ts_test))
        difference = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime(
        ):ts_test.index[-1].to_pydatetime()] + stl.trend.units.loc[
            ts_test.index[0].to_pydatetime():ts_test.index[-1].to_pydatetime()]

        def f(a):
            return (a + difference)

        prediction_interval = np.apply_along_axis(f, 0, prediction_interval)

    ########Compute the prediction error#############
    pe = prediction_error(ts_test.units,
                          ts_forecast,
                          original_df=original_df,
                          smooth_type=smooth_type)

    #######Assess the goodness of prediction interval########################
    acc_pi, avg_diff_pi = goodness_prediction_interval(ts_test,
                                                       prediction_interval)

    #    ############Plot the prediction and prediction intervals###################
    #    from func_visualisation import plot_prediction
    #    plot_prediction(df, prediction, prediction_interval)
    #

    return best_order, pe, acc_pi, avg_diff_pi
Exemple #22
0
    decomposition, hence the parameter seasonality. If none is given,
    then it will automatically be calculated to be 20% of the total
    timeseries.
    Args:
    ts (list or np.array): The timeseries to compute the ESD.
    seasonality (int): Number of time points for a season.
    hybrid (bool): See Twitter's research paper for difference.
    max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
    alpha (float): The significance level.
    rtype:
    list int: The indices of the anomalies in the timeseries.
    """
    ts = np.array(ts)
    # Seasonality is 20% of the ts if not given.
    seasonal = seasonality or int(0.2 * len(ts))
    decomposition = decompose(ts, period=seasonal)
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = esd(residual, max_anomalies=max_anomalies,
                   alpha=alpha, hybrid=hybrid)
    return outliers


def esd(ts, max_anomalies=10, alpha=0.05, hybrid=False):
    """
    Compute the Extreme Studentized Deviate of a time series.
    A Grubbs Test is performed max_anomalies times with the caveat
       that each time the top value is removed. For more details visit
       http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm
    Args:
        ts (list or np.array): The time series to compute the ESD.
        max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
Exemple #23
0
plt.show()

AirP["Residual"] = AirP["Season"] - AirP["Season_ave"]
AirP["Residual"].plot()
plt.show()


seasonal_decompose(AirP["Passengers"], model = "additive", freq = 12).plot()
plt.show()

seasonal_decompose(np.log(AirP["Passengers"]), model = "add").resid.plot()
plt.show()


from stldecompose import decompose
decompose(np.log(AirP["Passengers"]), period = 12).plot();

# =============================================================================
# vierteljährliche Bierproduktion in Australien (in Megaliter) zwischen 
# März 1956 und Juni 1994
# =============================================================================
AusBeer = pd.read_csv("data/AustralianBeer.csv", sep=";")
AusBeer.head()

AusBeer1 = AusBeer.copy()
AusBeer1.head()

AusBeer1["Quarter"] = pd.DatetimeIndex(AusBeer1["Quarter"])

AusBeer1.set_index("Quarter", inplace = True)
AusBeer1.head()
def get_decomposotion(insamaple_data, p):
    dec = decompose(insamaple_data, period=p)
    return dec.trend, dec.seasonal, dec.resid
Exemple #25
0
def main():
    '''
    Main function that generates the result.
    '''
    # load data
    data = pd.read_csv(args.excep_train, parse_dates=["SHIFT_DATE"])
    # create train, val, and test
    train = data[(data["SHIFT_DATE"] > "2012-12-31")
                 & (data["SHIFT_DATE"] < "2018-01-01")]
    val = data[(data["SHIFT_DATE"] > "2017-12-31")
               & (data["SHIFT_DATE"] < "2019-01-01")]

    # using only a portion of the sites
    train_clean = train[(train["SITE"] == "St Paul's Hospital") |
                        (train["SITE"] == "Mt St Joseph") |
                        (train["SITE"] == "Holy Family") |
                        (train["SITE"] == "SVH Langara") |
                        (train["SITE"] == "Brock Fahrni") |
                        (train["SITE"] == "Youville Residence")]
    train_clean = train_clean[(train_clean["JOB_FAMILY"] == "DC1000") |
                              (train_clean["JOB_FAMILY"] == "DC2A00") |
                              (train_clean["JOB_FAMILY"] == "DC2B00")]

    val_clean = val[(val["SITE"] == "St Paul's Hospital") |
                    (val["SITE"] == "Mt St Joseph") |
                    (val["SITE"] == "Holy Family") |
                    (val["SITE"] == "SVH Langara") |
                    (val["SITE"] == "Brock Fahrni") |
                    (val["SITE"] == "Youville Residence")]
    val_clean = val_clean[(val_clean["JOB_FAMILY"] == "DC1000") |
                          (val_clean["JOB_FAMILY"] == "DC2A00") |
                          (val_clean["JOB_FAMILY"] == "DC2B00")]

    # create training dataframes
    splitting_train = train_clean.groupby(
        ["JOB_FAMILY", "SITE", "SUB_PROGRAM",
         "SHIFT_DATE"]).size().reset_index()
    splitting_train = splitting_train.rename({
        "SHIFT_DATE": "ds",
        0: "y"
    },
                                             axis=1)

    # create validation dataframes
    splitting_val = val_clean.groupby(
        ["JOB_FAMILY", "SITE", "SUB_PROGRAM",
         "SHIFT_DATE"]).size().reset_index()
    splitting_val = splitting_val.rename({"SHIFT_DATE": "ds", 0: "y"}, axis=1)

    # create timeframe data for prediction
    total_timeframe = pd.DataFrame(
        pd.date_range(start='2013-01-01', end='2017-12-31',
                      freq="D")).rename({0: "ds"}, axis=1)
    timeframe = pd.DataFrame(
        pd.date_range(start='2018-01-01', end='2018-12-31',
                      freq="D")).rename({0: "ds"}, axis=1)

    # unique combinations
    sites = train_clean["SITE"].unique()
    job_families = train_clean["JOB_FAMILY"].unique()
    sub_programs = train_clean["SUB_PROGRAM"].unique()

    # create and store predictions and true results
    models = {}
    split_data = {}
    pred_results_past = {}
    pred_results_future = {}
    true_results = {}
    for i in sites:
        for j in job_families:
            for k in sub_programs:
                temp_data_train = splitting_train[
                    (splitting_train["SITE"] == i)
                    & (splitting_train["JOB_FAMILY"] == j) &
                    (splitting_train["SUB_PROGRAM"] == k)].reset_index()
                temp_data_train = pd.merge(total_timeframe,
                                           temp_data_train,
                                           on="ds",
                                           how="outer")
                temp_data_train["y"] = temp_data_train["y"].fillna(0)

                temp_data_val = splitting_val[
                    (splitting_val["SITE"] == i)
                    & (splitting_val["JOB_FAMILY"] == j) &
                    (splitting_val["SUB_PROGRAM"] == k)].reset_index(drop=True)
                temp_data_val = pd.merge(timeframe,
                                         temp_data_val,
                                         on="ds",
                                         how="outer")
                temp_data_val["y"] = temp_data_val["y"].fillna(0)

                split_data[(i, j, k)] = temp_data_train
                true_results[(i, j, k)] = temp_data_val
                if temp_data_val["y"].sum() >= 300.0:
                    pred_results_past[(i, j,
                                       k)], models[(i, j, k)] = run_prophet(
                                           temp_data_train, total_timeframe)
                    pred_results_future[(i, j,
                                         k)] = models[(i, j,
                                                       k)].predict(timeframe)
                    print("Fitting -", i, j, k, ": Done")

    # combine predictions and true results
    combined = {}
    for i in pred_results_future:
        combined[i] = pd.merge(
            true_results[i], pred_results_future[i], on="ds",
            how="outer")[["ds", "y", "yhat", "yhat_lower", "yhat_upper"]]

    # convert to week and calculating errors weekly
    weekly = {}
    for i in combined:
        # create week column
        combined[i]["ds"] = combined[i]["ds"] - pd.DateOffset(weekday=0,
                                                              weeks=1)
        combined[i]["week"] = combined[i]["ds"].dt.week

        # store y, yhat, yhat_lower, yhat_upper
        weekly_y = combined[i].groupby("ds").y.sum().reset_index()
        weekly_yhat = combined[i].groupby("ds").yhat.sum().astype(
            int).reset_index()
        weekly_yhat_lower = combined[i].groupby("ds").yhat_lower.sum().astype(
            int).reset_index()
        weekly_yhat_upper = combined[i].groupby("ds").yhat_upper.sum().astype(
            int).reset_index()

        # replace negative prediction values with 0
        weekly_yhat = weekly_yhat.where(weekly_yhat["yhat"] >= 0, 0)
        weekly_yhat_lower = weekly_yhat_lower.where(
            weekly_yhat_lower["yhat_lower"] >= 0, 0)
        weekly_yhat_upper = weekly_yhat_upper.where(
            weekly_yhat_upper["yhat_upper"] >= 0, 0)

        # merge weekly results
        weekly[i] = pd.concat([
            weekly_y, weekly_yhat["yhat"], weekly_yhat_lower["yhat_lower"],
            weekly_yhat_upper["yhat_upper"]
        ],
                              axis=1)

        # create columns "year", "site", "job_family", "sub_program"
        length = weekly[i].shape[0]
        weekly[i]["week"] = weekly[i]["ds"].dt.weekofyear
        weekly[i]["site"] = np.repeat(i[0], length)
        weekly[i]["job_family"] = np.repeat(i[1], length)
        weekly[i]["sub_program"] = np.repeat(i[2], length)

    # model residuals
    for i in weekly:
        forecasted = pred_results_past[i]
        actual = split_data[i]

        error = actual["y"] - forecasted["yhat"]
        obs = total_timeframe.copy()
        obs["error"] = error
        obs = obs.set_index("ds")

        decomp = decompose(obs, period=365)
        weekly_fcast = forecast(decomp,
                                steps=365,
                                fc_func=drift,
                                seasonal=True)
        weekly_fcast["week"] = weekly_fcast.index - pd.DateOffset(weekday=0,
                                                                  weeks=1)
        weekly_fcast = weekly_fcast.groupby("week").sum()

        resid_fcast = weekly_fcast.reset_index()["drift+seasonal"]
        weekly_yhat = (weekly[i]["yhat"] + resid_fcast).round(0)
        weekly_yhat_lower = (weekly[i]["yhat_lower"] + resid_fcast).round(0)
        weekly_yhat_upper = (weekly[i]["yhat_upper"] + resid_fcast).round(0)

        weekly[i]["yhat"] = weekly_yhat.where(weekly_yhat >= 0, 0)
        weekly[i]["yhat_lower"] = weekly_yhat_lower.where(
            weekly_yhat_lower >= 0, 0)
        weekly[i]["yhat_upper"] = weekly_yhat_upper.where(
            weekly_yhat_upper >= 0, 0)

    # create data/predictions folder if it doesn't exist
    predictions_path = "../data/predictions/"
    if not os.path.exists(predictions_path):
        os.mkdir(predictions_path)

    # export to "data/predictions/" directory
    total_data = pd.DataFrame()
    for i in weekly:
        total_data = pd.concat([total_data, weekly[i]], axis=0)
    total_data.to_csv(predictions_path + "exception_predictions.csv")
Exemple #26
0
plt.rc('figure',figsize=(14,6))
plt.rc('font',size=13)


result = seasonal_decompose(data['Adj. Close'],freq=252, 
                            model='additive')
result.plot()
plt.show()

from fbprophet import Prophet

pip install stldecompose

from stldecompose import decompose, forecast
stl = decompose(data['Adj. Close'])
stl.plot()
plt.show()

df.tail()

DF = df[['Adj. Close']].copy()
DF.reset_index(drop=False, inplace=True)
DF.rename(columns={'Date': 'ds', 'Adj. Close': 'y'}, inplace=True)
DF.tail()

#Split the series into the training and test sets:
train_indices = DF.ds.apply(lambda x: x.year) < 2017
X_train = DF.loc[train_indices].dropna()
X_test = DF.loc[~train_indices].reset_index(drop=True)
Exemple #27
0
def stl(train_set, test_set, params):
    complete_set = train_set + test_set
    decomposition = decompose(complete_set, period=365)
    forecast = [(decomposition.seasonal[i] + decomposition.trend[i]) for i in range(0, len(decomposition.seasonal))]
    return forecast[-len(test_set):], None
def stl(k):
    from stldecompose import decompose, forecast
    ar=new_tab.loc[(k)].values
    #print(len(ar))
    a=[]
    for i in range(len(ar)):
        a.append(ar[i][0])
    
    def timeseries_df():
        index = pd.date_range(start="01-01-2017", periods=len(a), freq='W-SAT')
        ts = pd.DataFrame(a, index=index, columns=['num_orders'])
        ts['num_orders']=a
        return ts
    
    
    ts = timeseries_df()
    #print(ts)
    #print(ts.index)
    X = ts.values
    train_size = int(len(X) * 0.60)
    test_size = len(X)-train_size
    #print(test_size," ", train_size)
    #training, testing = ts[0:train_size], ts[train_size:len(X)]
    train, test = ts[0:train_size], ts[train_size:len(X)]
    #print(train)
    #print(test)
    #print('Observations: %d' % (len(X)))
    #print('Training Observations: %d' % (len(train)))
    #print('Testing Observations: %d' % (len(test)))

    trend=['add','add','mul','mul']
    seasonal=['add','mul','add','mul']
    
    
    #print(test)
    
    decomp = decompose(train, period=7)
    #print(decomp)
    #print(type(decomp))
    #s=sm.tsa.seasonal_decompose(train)
    
    #print("trend")
    #print(decomp.trend)
    #print(decomp.resid)
    
    #print("season")
    #print(decomp.seasonal)
    
    fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True)
    #print(fcast)
    y_pred=[]
    for i in fcast.values:
        y_pred.append(i[0])
    #print(y_pred)
    y_true=[]
    for i in test.values:
        y_true.append(i[0])
    #print(y_true)
    Ferror=mean_squared_error(y_true, y_pred)
    
    return decomp,Ferror,test_size
Exemple #29
0
                    'iterations': 2, 'cv':5, 'scoring':'r2'}
m = sensingbee.ml_modeling.Model(estimator, tuning_conf).fit(X, y.loc[X.index])
print('R² = ',m.base_estimator.best_score_)
m.feature_importances_


#
# Temporal (STL) features prototyping
#
import pandas as pd
from stldecompose import decompose

# Global STL
ts = y.groupby('Timestamp').median()
ts.index = pd.to_datetime(ts.index)
stl = decompose(ts, period=7)
Xt = pd.DataFrame()
Xt['NO2_trend'] = stl.trend['Value']
Xt['NO2_seasonal'] = stl.seasonal['Value']
Xt['NO2_diff'] = ts.diff().fillna(0)['Value']
Xt.index = X.index.get_level_values('Timestamp').unique()

idx = pd.IndexSlice
for s in X.index.get_level_values('Sensor Name').unique():
    x = X.loc[idx[s,:],'NO2']
    ts = x.reset_index('Sensor Name',drop=True)
    ts.index = pd.to_datetime(ts.index)
    print(ts.shape, s)
    stl = decompose(ts, period=7)
    X.loc[x.index, 'NO2_trend'] = stl.trend
    X.loc[x.index, 'NO2_seasonal'] = stl.seasonal
Exemple #30
0
#
#
# The Data
#
# The [messages-per-hour] contains number of messages in an hour.
# There is a seasonality for working hours and working days (MON-FRI).
# At '2019-07-28 15:00:00' there is an anomaly (too many messages).
# Depending on the value of alpha (for moving STD) the anomaly can be in or out of the sleeve.
#

df = pd.read_csv('data/messages-per-hour.csv', names=['Hour', 'Count'], parse_dates=True)
df['Hour'] = pd.to_datetime(df['Hour'])
df = df.set_index('Hour')

# Break down the signal into STL parts
stl: DecomposeResult = decompose(df, period=24*7, lo_frac=0.7)
original = stl.__getattribute__('observed')
trend = stl.__getattribute__('trend')
seasonality = stl.__getattribute__('seasonal')
residual = stl.__getattribute__('resid')
sleeve_center = trend + seasonality

# Fixed Height Sleeve
fixed_std = residual.std()
df['upper_bound'] = sleeve_center + fixed_std * 3
df['lower_bound'] = sleeve_center - fixed_std * 3
plot_draw.draw(df, 'df-fixed-std')

# Variant Height Sleeve (MSTD with alpha=0.05)
moving_std = residual.ewm(alpha=0.05, min_periods=20, adjust=False).std()
df['upper_bound'] = sleeve_center + moving_std * 3