def seasonal_esd(ts, seasonality=None, hybrid=False, max_anomalies=10, alpha=0.05): """ Compute the Seasonal Extreme Studentized Deviate of a time series. The steps taken are first to to decompose the time series into STL decomposition (trend, seasonality, residual). Then, calculate the Median Absolute Deviate (MAD) if hybrid (otherwise the median) and perform a regular ESD test on the residual, which we calculate as: R = ts - seasonality - MAD or median Note: The statsmodel library requires a seasonality to compute the STL decomposition, hence the parameter seasonality. If none is given, then it will automatically be calculated to be 20% of the total timeseries. Args: ts (list or np.array): The timeseries to compute the ESD. seasonality (int): Number of time points for a season. hybrid (bool): See Twitter's research paper for difference. max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts. alpha (float): The significance level. Returns: list int: The indices of the anomalies in the timeseries. """ ts = np.array(ts) seasonal = seasonality or int( 0.2 * len(ts)) # Seasonality is 20% of the ts if not given. decomposition = decompose(ts, period=seasonal) residual = ts - decomposition.seasonal - np.median(ts) outliers = esd(residual, max_anomalies=max_anomalies, alpha=alpha, hybrid=hybrid) return outliers
def calculate_decomposition(data, model=MODEL_ADDITIVE, frequency=2): """ Calculate time series decomposition Args: data (list[float]): Input time series values model (str): Seasonal component type frequency (int): Seasonal component frequency Returns: dict: Calculation results (trend, seasonal, residual components) """ # Prepare data if model == MODEL_MULTIPLICATIVE: data = [log(x) for x in data] # Use model decomp = decompose(data, period=frequency) # Prepare result data if model == MODEL_MULTIPLICATIVE: decomp.trend = [exp(x) for x in decomp.trend] decomp.seasonal = [exp(x) for x in decomp.seasonal] decomp.resid = [exp(x) for x in decomp.resid] return { 'trend': decomp.trend, 'seasonal': decomp.seasonal, 'resid': decomp.resid, }
def get_imf(exp_numb,channel): samples = 100#728 #714, 724(4) path = "../data/imf_bpfo/sample{}_imfs.csv".format(samples) df = pd.read_csv(path) #new_df = df.loc[:,"imf1":"imf4"] #new_df.plot() #plt.show() #exit() k = 6 #for k in range(1,9): s = df["imf{}".format(k)].values decomp = decompose(s) lim = 10000 seasonality = decomp.seasonal[:lim] t = np.linspace(0,1, lim) plt.plot(t,seasonality) #data = get_experiment_bearing_data(exp_numb,channel) #k = 850 #j = 8 #signal = data[k] #imfs = get_imfs(signal) #imf = imfs[j] #decomp = decompose(imf) #pulse = decomp.seasonal #lim = 10000 #plt.plot(pulse[:lim]) plt.show()
def series_decompose(series_df): ''' Description: Runs the stldecompose decompose method on a time series from create_float_series by adding 96 (number of points in a day in the database) as period value Parameters: A pandas DataFrame time series Returns: a statsmodel object representing the decomposed series ''' decomped_series = decompose(series_df.values, period=96) return decomped_series
def _type_std(dataframe, type_count, type_id, length): seasonal_vals = np.zeros( (len(dataframe.columns) // type_count, len(dataframe))) for i in range(type_id, len(dataframe.columns), type_count): seasonal_vals[i // type_count] = stl.decompose( dataframe[i], period=length).seasonal.values type_std = np.mean(np.std(seasonal_vals, axis=0)) return length, type_std
def main(): #load data e = exchange.Exchange('../../lib/binance.db') start = int(datetime.datetime(2018, 4, 1).timestamp() * 1000) end = int(datetime.datetime(2019, 5, 1).timestamp() * 1000) #end = int(datetime.datetime(2018, 5, 1).timestamp() * 1000) print('Loading order data...') number_of_orders, prices = e.get_total_orders_ts('BTCUSDT', 60 * 60 * 1000 * 6, start, end) #hourly data print('done') buy_orders = np.array([b for s, b in number_of_orders]) sell_orders = np.array([s for s, b in number_of_orders]) buy_orders[buy_orders <= 0] = np.mean(buy_orders) sell_orders[sell_orders <= 0] = np.mean(sell_orders) returns = np.array(calculate_returns(prices)) returns_decomp = decompose(returns, period=24 * 7) sa_returns = returns_decomp.trend + returns_decomp.resid buy_orders_decomp = decompose(buy_orders, period=24 * 7) sa_buy_orders = buy_orders_decomp.trend + buy_orders_decomp.resid sell_orders_decomp = decompose(sell_orders, period=24 * 7) sa_sell_orders = sell_orders_decomp.trend + sell_orders_decomp.resid x, corrs = get_correlation_coef( buy_orders_decomp.resid - sell_orders_decomp.resid, returns_decomp.resid, 28, False) plt.bar(x, corrs) plt.show()
def plot_seasonal(df, tic, OUT_DIR): df_close = df[['date', 'close']].copy() df_close = df_close.set_index('date') decomp = decompose(df_close, period=365) fig = decomp.plot() fig.set_size_inches(20, 8) filename = OUT_DIR + tic + "_decompose.png" plt.savefig(filename, dpi=500)
def _type_std_seasonal(dataframe, type_count, length): # pylint: disable=no-member seasonal_vals = np.zeros((len(dataframe.columns), len(dataframe))) for i in range(len(dataframe.columns)): seasonal_vals[i] = stl.decompose(dataframe[i], period=length).seasonal.values type_stds = [ np.mean(np.std(seasonal_vals[t::type_count], axis=0)) for t in range(type_count) ] return length, np.mean(type_stds)
def stl_seasonal_decomposition(self): if self.has_validation_error: return # Decomposition based on stl - Package: stldecompose org_unit_group_stl = decompose(self.series, period=12) fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 9)) self.series.plot(ax=ax1) org_unit_group_stl.trend.plot(ax=ax2) org_unit_group_stl.seasonal.plot(ax=ax3) org_unit_group_stl.resid.plot(ax=ax4) ax1.set_title("Vaccine Demand for {} in {}".format( self.vaccine, self.health_facility)) ax2.set_title("Trend") ax3.set_title("Seasonality") ax4.set_title("Residuals") plt.tight_layout() plt.show() # Eliminating the seasonal component org_unit_group_adjusted = self.series - org_unit_group_stl.seasonal plt.figure(figsize=(12, 8)) org_unit_group_adjusted.plot() plt.title( "Plot of Vaccine Demand of {} in {} without Seasonal Component". format(self.vaccine, self.health_facility)) plt.show() # # Getting the seasonal component only # Seasonality gives structure to the data plt.figure(figsize=(12, 8)) org_unit_group_stl.seasonal.plot() plt.title( "Plot of Seasonal Component of Vaccine Demand of {} in {}".format( self.vaccine, self.health_facility)) plt.show() # Creating a forecast based on STL stl_fcast = forecast(org_unit_group_stl, steps=12, fc_func=seasonal_naive, seasonal=True) # Plot of the forecast and the original data plt.figure(figsize=(12, 8)) plt.plot(self.series, label='BCG Wastage Rate') plt.plot(stl_fcast, label=stl_fcast.columns[0]) plt.title( "Plot of Vaccine Demand of {} in {} Next Year Forecast".format( self.vaccine, self.health_facility)) plt.legend() plt.show()
def detrend(self, label=None): """Returns the timeseries without trend component Args: None Returns: A FixedIndexTimeseries Object Raises: None """ dec = decompose(self.timeseries.values, period=self.maxindex) return FixedIndexTimeseries(pandas.Series(dec.resid, index=self.timeseries.index)+pandas.Series(dec.seasonal, index=self.timeseries.index), mode=self.mode, label=self.label if label is None else label)
def createDataset_uber(look_back): scaler = preprocessing.MinMaxScaler() df = pd.read_csv(DATASET_STATES_CSV, index_col=0, parse_dates=True, encoding="utf-8") df = normalize_df(df, DATA_SCALER()) df += 1 df = df.apply(np.log) df2 = df.copy() for col in df: decomp = decompose(df[col], period=12) df[col] = decomp.trend + decomp.resid df2[col + '_t'] = decomp.trend df2[col + '_s'] = decomp.seasonal df2[col + '_r'] = decomp.resid res = [] for col in df: x, y, offsets, x_s, y_s = create_dataset_uberSc( df[col], df2[col + '_t'], df2[col + '_s'], look_back) x_test = np.copy(x[-FORECASTING_STEPS:]) y_test = np.copy(y[-FORECASTING_STEPS:]) offsets_test = np.copy(offsets[-FORECASTING_STEPS:]) x_s_test = np.copy(x_s[-FORECASTING_STEPS:]) y_s_test = np.copy(y_s[-FORECASTING_STEPS:]) x = x[:-FORECASTING_STEPS] y = y[:-FORECASTING_STEPS] x_s = x_s[:-FORECASTING_STEPS] y_s = y_s[:-FORECASTING_STEPS] offsets = offsets[:-FORECASTING_STEPS] x_val = np.copy(x[-1:]) y_val = np.copy(y[-1:]) x_s_val = np.copy(x_s[-1:]) y_s_val = np.copy(y_s[-1:]) offsets_val = np.copy(offsets[-1:]) x_train = np.copy(x[:-1]) y_train = np.copy(y[:-1]) x_s_train = np.copy(x_s[:-1]) y_s_train = np.copy(y_s[:-1]) offsets_train = np.copy(offsets[:-1]) #y_test = test #x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.33, random_state=RANDOM_SEED) res.append((x_train, x_val, x_test, y_train, y_val, y_test, offsets_train, offsets_val, offsets_test, x_s_train, x_s_val, x_s_test, y_s_train, y_s_val, y_s_test)) return res
def trend(data, period, yl): stl = decompose(data, period=len(data)) plt.figure(facecolor='#ffffff') plt.gcf().clear() plt.plot(stl.trend, 'o-', marker='o', color='b') plt.xticks(rotation=30) plt.grid(True, color='#e5e5cc', linestyle='-', linewidth=1) plt.xlabel(None) plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20) plt.title('Sales Trends', y=1.05, color='#630b0b', fontsize=21) plt.tight_layout() # img = io.BytesIO() # plt.savefig(img, format = 'png') # plot = base64.b64encode(img.getvalue()).decode() # return plot return plt.show
def create_model(Date): i=totalMeals ts = timeseries_df(i,Date) X = ts.values #STL modelSTL,errorSTL=stl(X,ts) #ETS modelETS,errorETS=ets(X) #Comparing errors of ETS and STL print(errorSTL) print(errorETS) error=min(errorSTL,errorETS) if(error==errorSTL): FinalModel=modelSTL FModel='STL' print("STL") elif(error==errorETS): FinalModel=modelETS FModel='ETS' print("ETS") #If STL Model is appropriate if(FModel=='STL'): from stldecompose import decompose, forecast globals()['STL%s' % i] = FinalModel STL.append(i) FinalModel = decompose(ts, period=7) joblib.dump(FinalModel, 'STL'+ str(i) +'.xml', compress=1) #If ETS Model is appropriate elif(FModel=='ETS'): globals()['ETS%s' % i] = FinalModel ETS.append(i) if(modelETS==1): FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='add',damped=True).fit(use_boxcox=True) if(modelETS==2): FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='add', seasonal='mul',damped=True).fit(use_boxcox=True) if(modelETS==3): FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='add',damped=True).fit(use_boxcox=True) if(modelETS==4): FinalModel = ExponentialSmoothing(X, seasonal_periods=7, trend='mul', seasonal='mul',damped=True).fit(use_boxcox=True) joblib.dump(FinalModel, 'ETS'+ str(i) +'.xml', compress=1)
def seasonal_esd(ts, seasonality=None, hybrid=False, max_anomalies=10, alpha=0.05): """ Compute the Seasonal Extreme Studentized Deviate of a time series depending on the hybrid. """ ts = np.array(ts) seasonal = seasonality or int( 0.2 * len(ts)) # Seasonality is 20% of the ts if not given. decomposition = decompose(ts, period=seasonal) residual = ts - decomposition.seasonal - np.median(ts) outliers = esd(residual, max_anomalies=max_anomalies, alpha=alpha, hybrid=hybrid) return outliers
def burst_amplitude(): amplitudes = [] path = "../data/imf_bpfi/" files = glob("{}/*".format(path)) for sample_k, file in enumerate(files): print("processing file {}".format(sample_k+1)) df = pd.read_csv(file) columns = list(df.columns) temp_list = [] temp_dict = {} for j, column_name in enumerate(columns): imf = list(df[column_name]) decomp = decompose(imf) seasonality = decomp.seasonal col = "slt_of_imf{}".format(j+1) temp_list.append((col,seasonality)) temp_dict = dict(temp_list) temp_df = pd.DataFrame(temp_dict) temp_df.to_csv("../data/stl_bpfi/sample{}_stl.csv".format(sample_k))
def stl_decompose(series, period=None): """ Decompose ts using STL - Seasonal and Trend decomposition using Loess series - ts data period - (largest) period of seasonality """ import stldecompose as stl result = stl.decompose(series, period=period) fig, axis = plt.subplots(4, 1, figsize=(15, 10)) result.observed.plot(ax=axis[0], title='Observed') result.trend.plot(ax=axis[1], title='Trend') result.seasonal.plot(ax=axis[2], title='Seasonal') result.resid.plot(ax=axis[3], title='Residual') plt.tight_layout()
def stl(X,ts): print("Entering STL") from stldecompose import decompose, forecast train_size = int(len(X) * 0.90) test_size = len(X)-train_size train, test = ts[0:train_size], ts[train_size:len(X)] decomp = decompose(train, period=7) fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True) #Error Calculation y_pred=[] for i in fcast.values: y_pred.append(i[0]) y_true=[] for i in test.values: y_true.append(i[0]) Ferror=mean_squared_error(y_true, y_pred) print("Leaving STL") return decomp,Ferror
def season(data, period, yl): # v = int(len(data)//(len(data)//4)) stl = decompose(data, period=period) plt.gcf().clear() plt.plot(stl.seasonal, '-', color='b') # for i,j in zip(stl.seasonal.index, stl.seasonal.values): # plt.annotate(str(j),xy=(i,j)) plt.xticks(rotation=30) plt.grid(True) plt.xlabel(None) plt.ylabel(str(yl) + ' Sales Averages (USD)', labelpad=20) plt.title('Sales Seasonality', y=1.05, color='#630b0b', fontsize=21) plt.tight_layout() # img = io.BytesIO() # plt.savefig(img, format = 'png') # plot = base64.b64encode(img.getvalue()).decode() return plt.show
def decomposition_plot(series_data, period): """ decomposition of the original signal for preliminary analysis Args: series_data: Pandas Series object period: estimated seasonal frequency """ # if time series is not a Series object, convert it to if not isinstance(series_data, pd.Series): series_data = pd.Series(series_data) # naive additive decomposition decomp = seasonal_decompose(series_data.values, model='additive', freq=period) decomp.plot() # stl decompose stl = decompose(series_data.values, period=period) stl.plot() plt.show() plt.pause(0.01)
def decomposeSeries(self, ts, decompType=None): decomp = None if decompType is None: decompType = self.decompType.currentText() if decompType == "Additive": from statsmodels.tsa.seasonal import seasonal_decompose decomp = seasonal_decompose(ts, model="additive", freq=96) elif decompType == "Multiplicative": from statsmodels.tsa.seasonal import seasonal_decompose try: decomp = seasonal_decompose(ts, model="multiplicative", freq=96) except: decomp = None elif decompType == "Loess (STL)": from stldecompose import decompose decomp = decompose(ts, period=96) return decomp
def automation_single_ts_arma_analysis(original_df, smoothed_df, smooth_type, inclusion, stationarity): from statsmodels.tsa.arima_model import ARIMA from math import ceil import numpy as np import pandas as pd from functions import goodness_prediction_interval, forecast_pred_int, prediction_error if smooth_type == 'normal': ts = original_df else: ts = smoothed_df if (stationarity == True): ###Split the time series dataset into training and testing################ ts_train = ts[0:ceil(len(ts) * 0.9)] ts_test = ts[ceil(len(ts) * 0.9):] #find the best ordered ARMA model best_hqic = np.inf best_order = None best_mdl = None rng = range(5) for p in rng: for d in rng: for q in rng: try: tmp_mdl = ARIMA(ts_train.values, order=(p, d, q)).fit(method='mle', trend='nc') tmp_hqic = tmp_mdl.hqic if tmp_hqic < best_hqic: best_hqic = tmp_hqic best_order = (p, d, q) best_mdl = tmp_mdl except: continue #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order)) #.plot_redict function has problem. firstdate = str(ts_test.index[0]) lastdate = str(ts_test.index[-1]) #ts_predict = best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime()) #ts_predict = best_mdl.predict(start = ts.index.get_loc(pd.to_datetime(firstdate)), end = ts.index.get_loc(pd.to_datetime(lastdate))) ###calcualte the prediction interval. ts_forecast, std_error, prediction_interval = best_mdl.forecast( len(ts_test)) else: #####remove trend and seasonality from the time series.################# from stldecompose import decompose, forecast from stldecompose.forecast_funcs import (naive, drift, mean, seasonal_naive) #########################If the length of the ts is shorter than 130##### ########################This is weekly data############################# if len(ts) < 130: stl = decompose(ts, period=52) else: if (inclusion == False): stl = decompose(ts, period=251) else: stl = decompose(ts, period=365) ######Fit ARMA on the Residual############## ts_train = stl.resid[0:ceil(len(stl.resid) * 0.9)] ts_test = stl.resid[ceil(len(stl.resid) * 0.9):] best_hqic = np.inf best_order = None best_mdl = None rng = range(5) for p in rng: for d in rng: for q in rng: try: tmp_mdl = ARIMA(ts_train.values, order=(p, d, q)).fit(method='mle', trend='nc') tmp_hqic = tmp_mdl.hqic if tmp_hqic < best_hqic: best_hqic = tmp_hqic best_order = (p, d, q) best_mdl = tmp_mdl except: continue #print('hqic: {:6.5f} | order: {}'.format(best_hqic, best_order)) #######Prediction################# firstdate = str(ts_test.index[0]) lastdate = str(ts_test.index[-1]) #ts_predict = best_mdl.predict(start = ts_test.index[0].to_pydatetime(), end = ts_test.index[-1].to_pydatetime()) ts_predict = best_mdl.predict( start=ts.index.get_loc(pd.to_datetime(firstdate)), end=ts.index.get_loc(pd.to_datetime(lastdate))) #######Add back the trend and seasonality ######## ts_predict = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime( ):ts_test.index[-1].to_pydatetime( )] + stl.trend.units.loc[ts_test.index[0].to_pydatetime( ):ts_test.index[-1].to_pydatetime()] + pd.Series(index=ts_test.index, data=ts_predict) #########Compute the prediction interval ts_forecast, std_error, prediction_interval = best_mdl.forecast( len(ts_test)) difference = stl.seasonal.units.loc[ts_test.index[0].to_pydatetime( ):ts_test.index[-1].to_pydatetime()] + stl.trend.units.loc[ ts_test.index[0].to_pydatetime():ts_test.index[-1].to_pydatetime()] def f(a): return (a + difference) prediction_interval = np.apply_along_axis(f, 0, prediction_interval) ########Compute the prediction error############# pe = prediction_error(ts_test.units, ts_forecast, original_df=original_df, smooth_type=smooth_type) #######Assess the goodness of prediction interval######################## acc_pi, avg_diff_pi = goodness_prediction_interval(ts_test, prediction_interval) # ############Plot the prediction and prediction intervals################### # from func_visualisation import plot_prediction # plot_prediction(df, prediction, prediction_interval) # return best_order, pe, acc_pi, avg_diff_pi
decomposition, hence the parameter seasonality. If none is given, then it will automatically be calculated to be 20% of the total timeseries. Args: ts (list or np.array): The timeseries to compute the ESD. seasonality (int): Number of time points for a season. hybrid (bool): See Twitter's research paper for difference. max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts. alpha (float): The significance level. rtype: list int: The indices of the anomalies in the timeseries. """ ts = np.array(ts) # Seasonality is 20% of the ts if not given. seasonal = seasonality or int(0.2 * len(ts)) decomposition = decompose(ts, period=seasonal) residual = ts - decomposition.seasonal - np.median(ts) outliers = esd(residual, max_anomalies=max_anomalies, alpha=alpha, hybrid=hybrid) return outliers def esd(ts, max_anomalies=10, alpha=0.05, hybrid=False): """ Compute the Extreme Studentized Deviate of a time series. A Grubbs Test is performed max_anomalies times with the caveat that each time the top value is removed. For more details visit http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm Args: ts (list or np.array): The time series to compute the ESD. max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
plt.show() AirP["Residual"] = AirP["Season"] - AirP["Season_ave"] AirP["Residual"].plot() plt.show() seasonal_decompose(AirP["Passengers"], model = "additive", freq = 12).plot() plt.show() seasonal_decompose(np.log(AirP["Passengers"]), model = "add").resid.plot() plt.show() from stldecompose import decompose decompose(np.log(AirP["Passengers"]), period = 12).plot(); # ============================================================================= # vierteljährliche Bierproduktion in Australien (in Megaliter) zwischen # März 1956 und Juni 1994 # ============================================================================= AusBeer = pd.read_csv("data/AustralianBeer.csv", sep=";") AusBeer.head() AusBeer1 = AusBeer.copy() AusBeer1.head() AusBeer1["Quarter"] = pd.DatetimeIndex(AusBeer1["Quarter"]) AusBeer1.set_index("Quarter", inplace = True) AusBeer1.head()
def get_decomposotion(insamaple_data, p): dec = decompose(insamaple_data, period=p) return dec.trend, dec.seasonal, dec.resid
def main(): ''' Main function that generates the result. ''' # load data data = pd.read_csv(args.excep_train, parse_dates=["SHIFT_DATE"]) # create train, val, and test train = data[(data["SHIFT_DATE"] > "2012-12-31") & (data["SHIFT_DATE"] < "2018-01-01")] val = data[(data["SHIFT_DATE"] > "2017-12-31") & (data["SHIFT_DATE"] < "2019-01-01")] # using only a portion of the sites train_clean = train[(train["SITE"] == "St Paul's Hospital") | (train["SITE"] == "Mt St Joseph") | (train["SITE"] == "Holy Family") | (train["SITE"] == "SVH Langara") | (train["SITE"] == "Brock Fahrni") | (train["SITE"] == "Youville Residence")] train_clean = train_clean[(train_clean["JOB_FAMILY"] == "DC1000") | (train_clean["JOB_FAMILY"] == "DC2A00") | (train_clean["JOB_FAMILY"] == "DC2B00")] val_clean = val[(val["SITE"] == "St Paul's Hospital") | (val["SITE"] == "Mt St Joseph") | (val["SITE"] == "Holy Family") | (val["SITE"] == "SVH Langara") | (val["SITE"] == "Brock Fahrni") | (val["SITE"] == "Youville Residence")] val_clean = val_clean[(val_clean["JOB_FAMILY"] == "DC1000") | (val_clean["JOB_FAMILY"] == "DC2A00") | (val_clean["JOB_FAMILY"] == "DC2B00")] # create training dataframes splitting_train = train_clean.groupby( ["JOB_FAMILY", "SITE", "SUB_PROGRAM", "SHIFT_DATE"]).size().reset_index() splitting_train = splitting_train.rename({ "SHIFT_DATE": "ds", 0: "y" }, axis=1) # create validation dataframes splitting_val = val_clean.groupby( ["JOB_FAMILY", "SITE", "SUB_PROGRAM", "SHIFT_DATE"]).size().reset_index() splitting_val = splitting_val.rename({"SHIFT_DATE": "ds", 0: "y"}, axis=1) # create timeframe data for prediction total_timeframe = pd.DataFrame( pd.date_range(start='2013-01-01', end='2017-12-31', freq="D")).rename({0: "ds"}, axis=1) timeframe = pd.DataFrame( pd.date_range(start='2018-01-01', end='2018-12-31', freq="D")).rename({0: "ds"}, axis=1) # unique combinations sites = train_clean["SITE"].unique() job_families = train_clean["JOB_FAMILY"].unique() sub_programs = train_clean["SUB_PROGRAM"].unique() # create and store predictions and true results models = {} split_data = {} pred_results_past = {} pred_results_future = {} true_results = {} for i in sites: for j in job_families: for k in sub_programs: temp_data_train = splitting_train[ (splitting_train["SITE"] == i) & (splitting_train["JOB_FAMILY"] == j) & (splitting_train["SUB_PROGRAM"] == k)].reset_index() temp_data_train = pd.merge(total_timeframe, temp_data_train, on="ds", how="outer") temp_data_train["y"] = temp_data_train["y"].fillna(0) temp_data_val = splitting_val[ (splitting_val["SITE"] == i) & (splitting_val["JOB_FAMILY"] == j) & (splitting_val["SUB_PROGRAM"] == k)].reset_index(drop=True) temp_data_val = pd.merge(timeframe, temp_data_val, on="ds", how="outer") temp_data_val["y"] = temp_data_val["y"].fillna(0) split_data[(i, j, k)] = temp_data_train true_results[(i, j, k)] = temp_data_val if temp_data_val["y"].sum() >= 300.0: pred_results_past[(i, j, k)], models[(i, j, k)] = run_prophet( temp_data_train, total_timeframe) pred_results_future[(i, j, k)] = models[(i, j, k)].predict(timeframe) print("Fitting -", i, j, k, ": Done") # combine predictions and true results combined = {} for i in pred_results_future: combined[i] = pd.merge( true_results[i], pred_results_future[i], on="ds", how="outer")[["ds", "y", "yhat", "yhat_lower", "yhat_upper"]] # convert to week and calculating errors weekly weekly = {} for i in combined: # create week column combined[i]["ds"] = combined[i]["ds"] - pd.DateOffset(weekday=0, weeks=1) combined[i]["week"] = combined[i]["ds"].dt.week # store y, yhat, yhat_lower, yhat_upper weekly_y = combined[i].groupby("ds").y.sum().reset_index() weekly_yhat = combined[i].groupby("ds").yhat.sum().astype( int).reset_index() weekly_yhat_lower = combined[i].groupby("ds").yhat_lower.sum().astype( int).reset_index() weekly_yhat_upper = combined[i].groupby("ds").yhat_upper.sum().astype( int).reset_index() # replace negative prediction values with 0 weekly_yhat = weekly_yhat.where(weekly_yhat["yhat"] >= 0, 0) weekly_yhat_lower = weekly_yhat_lower.where( weekly_yhat_lower["yhat_lower"] >= 0, 0) weekly_yhat_upper = weekly_yhat_upper.where( weekly_yhat_upper["yhat_upper"] >= 0, 0) # merge weekly results weekly[i] = pd.concat([ weekly_y, weekly_yhat["yhat"], weekly_yhat_lower["yhat_lower"], weekly_yhat_upper["yhat_upper"] ], axis=1) # create columns "year", "site", "job_family", "sub_program" length = weekly[i].shape[0] weekly[i]["week"] = weekly[i]["ds"].dt.weekofyear weekly[i]["site"] = np.repeat(i[0], length) weekly[i]["job_family"] = np.repeat(i[1], length) weekly[i]["sub_program"] = np.repeat(i[2], length) # model residuals for i in weekly: forecasted = pred_results_past[i] actual = split_data[i] error = actual["y"] - forecasted["yhat"] obs = total_timeframe.copy() obs["error"] = error obs = obs.set_index("ds") decomp = decompose(obs, period=365) weekly_fcast = forecast(decomp, steps=365, fc_func=drift, seasonal=True) weekly_fcast["week"] = weekly_fcast.index - pd.DateOffset(weekday=0, weeks=1) weekly_fcast = weekly_fcast.groupby("week").sum() resid_fcast = weekly_fcast.reset_index()["drift+seasonal"] weekly_yhat = (weekly[i]["yhat"] + resid_fcast).round(0) weekly_yhat_lower = (weekly[i]["yhat_lower"] + resid_fcast).round(0) weekly_yhat_upper = (weekly[i]["yhat_upper"] + resid_fcast).round(0) weekly[i]["yhat"] = weekly_yhat.where(weekly_yhat >= 0, 0) weekly[i]["yhat_lower"] = weekly_yhat_lower.where( weekly_yhat_lower >= 0, 0) weekly[i]["yhat_upper"] = weekly_yhat_upper.where( weekly_yhat_upper >= 0, 0) # create data/predictions folder if it doesn't exist predictions_path = "../data/predictions/" if not os.path.exists(predictions_path): os.mkdir(predictions_path) # export to "data/predictions/" directory total_data = pd.DataFrame() for i in weekly: total_data = pd.concat([total_data, weekly[i]], axis=0) total_data.to_csv(predictions_path + "exception_predictions.csv")
plt.rc('figure',figsize=(14,6)) plt.rc('font',size=13) result = seasonal_decompose(data['Adj. Close'],freq=252, model='additive') result.plot() plt.show() from fbprophet import Prophet pip install stldecompose from stldecompose import decompose, forecast stl = decompose(data['Adj. Close']) stl.plot() plt.show() df.tail() DF = df[['Adj. Close']].copy() DF.reset_index(drop=False, inplace=True) DF.rename(columns={'Date': 'ds', 'Adj. Close': 'y'}, inplace=True) DF.tail() #Split the series into the training and test sets: train_indices = DF.ds.apply(lambda x: x.year) < 2017 X_train = DF.loc[train_indices].dropna() X_test = DF.loc[~train_indices].reset_index(drop=True)
def stl(train_set, test_set, params): complete_set = train_set + test_set decomposition = decompose(complete_set, period=365) forecast = [(decomposition.seasonal[i] + decomposition.trend[i]) for i in range(0, len(decomposition.seasonal))] return forecast[-len(test_set):], None
def stl(k): from stldecompose import decompose, forecast ar=new_tab.loc[(k)].values #print(len(ar)) a=[] for i in range(len(ar)): a.append(ar[i][0]) def timeseries_df(): index = pd.date_range(start="01-01-2017", periods=len(a), freq='W-SAT') ts = pd.DataFrame(a, index=index, columns=['num_orders']) ts['num_orders']=a return ts ts = timeseries_df() #print(ts) #print(ts.index) X = ts.values train_size = int(len(X) * 0.60) test_size = len(X)-train_size #print(test_size," ", train_size) #training, testing = ts[0:train_size], ts[train_size:len(X)] train, test = ts[0:train_size], ts[train_size:len(X)] #print(train) #print(test) #print('Observations: %d' % (len(X))) #print('Training Observations: %d' % (len(train))) #print('Testing Observations: %d' % (len(test))) trend=['add','add','mul','mul'] seasonal=['add','mul','add','mul'] #print(test) decomp = decompose(train, period=7) #print(decomp) #print(type(decomp)) #s=sm.tsa.seasonal_decompose(train) #print("trend") #print(decomp.trend) #print(decomp.resid) #print("season") #print(decomp.seasonal) fcast = forecast(decomp, steps=test_size, fc_func=naive, seasonal=True) #print(fcast) y_pred=[] for i in fcast.values: y_pred.append(i[0]) #print(y_pred) y_true=[] for i in test.values: y_true.append(i[0]) #print(y_true) Ferror=mean_squared_error(y_true, y_pred) return decomp,Ferror,test_size
'iterations': 2, 'cv':5, 'scoring':'r2'} m = sensingbee.ml_modeling.Model(estimator, tuning_conf).fit(X, y.loc[X.index]) print('R² = ',m.base_estimator.best_score_) m.feature_importances_ # # Temporal (STL) features prototyping # import pandas as pd from stldecompose import decompose # Global STL ts = y.groupby('Timestamp').median() ts.index = pd.to_datetime(ts.index) stl = decompose(ts, period=7) Xt = pd.DataFrame() Xt['NO2_trend'] = stl.trend['Value'] Xt['NO2_seasonal'] = stl.seasonal['Value'] Xt['NO2_diff'] = ts.diff().fillna(0)['Value'] Xt.index = X.index.get_level_values('Timestamp').unique() idx = pd.IndexSlice for s in X.index.get_level_values('Sensor Name').unique(): x = X.loc[idx[s,:],'NO2'] ts = x.reset_index('Sensor Name',drop=True) ts.index = pd.to_datetime(ts.index) print(ts.shape, s) stl = decompose(ts, period=7) X.loc[x.index, 'NO2_trend'] = stl.trend X.loc[x.index, 'NO2_seasonal'] = stl.seasonal
# # # The Data # # The [messages-per-hour] contains number of messages in an hour. # There is a seasonality for working hours and working days (MON-FRI). # At '2019-07-28 15:00:00' there is an anomaly (too many messages). # Depending on the value of alpha (for moving STD) the anomaly can be in or out of the sleeve. # df = pd.read_csv('data/messages-per-hour.csv', names=['Hour', 'Count'], parse_dates=True) df['Hour'] = pd.to_datetime(df['Hour']) df = df.set_index('Hour') # Break down the signal into STL parts stl: DecomposeResult = decompose(df, period=24*7, lo_frac=0.7) original = stl.__getattribute__('observed') trend = stl.__getattribute__('trend') seasonality = stl.__getattribute__('seasonal') residual = stl.__getattribute__('resid') sleeve_center = trend + seasonality # Fixed Height Sleeve fixed_std = residual.std() df['upper_bound'] = sleeve_center + fixed_std * 3 df['lower_bound'] = sleeve_center - fixed_std * 3 plot_draw.draw(df, 'df-fixed-std') # Variant Height Sleeve (MSTD with alpha=0.05) moving_std = residual.ewm(alpha=0.05, min_periods=20, adjust=False).std() df['upper_bound'] = sleeve_center + moving_std * 3