def main(): Na, Nk = 600, 600 with open( '{training_data}.pickle'.format(training_data=args.training_data), 'rb') as training_data: x = pickle.load(training_data)[:, ::2] x = standardization(x) residual_x = np.zeros((Na * Nk, x.shape[1] // 2)) for i in range(0, Na * Nk): seasons, trend = fit_seasons(x[i, :]) if seasons is None: residual = x[i, :] - trend residual_x[i, :] = residual else: residual_x[i, :] = adjust_seasons(x[i, :], seasons=seasons) - trend with open('{out}.pickle'.format(out=args.out), 'wb') as residual_x_pickle: pickle.dump(residual_x, residual_x_pickle, protocol=pickle.HIGHEST_PROTOCOL) res_mean = np.mean(residual_x, axis=1) with open('{out}_mean.pickle'.format(out=args.out), 'wb') as residual_x_mean_pickle: pickle.dump(res_mean, residual_x_mean_pickle, protocol=pickle.HIGHEST_PROTOCOL) res_var = np.var(residual_x, ddof=1, axis=1) with open('{out}_var.pickle'.format(out=args.out), 'wb') as residual_x_var_pickle: pickle.dump(res_var, residual_x_var_pickle, protocol=pickle.HIGHEST_PROTOCOL)
def madseason(t, minW, maxW, mad_pwr): # seasonal decomposition by Season module seasons, trend = fit_seasons(t) # adjusted season adjusted = adjust_seasons(t, seasons=seasons) if adjusted is not None: # Residuals residual = adjusted - trend # rs = residual.copy() # Seasons seasons = t - adjusted # Trend time series trend = pd.Series(trend, index=adjusted.index) # Cleaner cleaned = dbl_mad_clnr(residual, minW, maxW, mad_pwr) # Reconstructed time series timeseries = trend + seasons + cleaned return timeseries else: return None
def ano_points(data_col, period=7, trend_type="median"): remainder = [] indice = [] NUM_DAYS =period * 30 # detrend and deseasonalize print "len(data_col) = ", len(data_col) for i in range((period*30), len(data_col)): if (i>=NUM_DAYS): d=data_col[(i-NUM_DAYS+1):(i + 1)] else: d = data_col[:(i + 1)] # d = data_col[:(i + 1)] seasons, trend = fit_seasons(d, trend=trend_type, period=period) if (seasons is None): #print "none at ", i seasons = [0L] *period adjusted = adjust_seasons(d, seasons=seasons) # print "seasons", seasons # print "adjusted", adjusted # print "trend", trend residual = adjusted - trend # flag=True # previous_season = seasons # season_.append(np.mean(seasons.tolist())) remainder.append(residual[(len(residual)-1)]) indice.append(i) print "i =", i remainder = [round(elem, 1) for elem in remainder] q75, q25 = np.percentile(remainder, [75, 25]) IQR = q75 - q25 low_threshold = q25 - IQR * 1.5 high_threshold = q75 + IQR * 1.5 outliers = [0L] * len(data_col) for i in range(len(remainder)): if (remainder[i] > high_threshold or remainder[i] < low_threshold): outliers[indice[i]] = 1L #print indice[i] # season_ = list(np.array(season_).flat) # print season_ # print len(season_) return outliers
def _remove_seasonality(self, series, likely_period = None): from seasonal import fit_seasons, adjust_seasons # detrend and deseasonalize seasons, trend = fit_seasons(series, period=likely_period) adjusted = adjust_seasons(series, seasons=seasons, period=likely_period) if adjusted is None: return numpy.nan return adjusted[-1]
def _remove_seasonality(self, series, likely_period=None): from seasonal import fit_seasons, adjust_seasons # detrend and deseasonalize seasons, trend = fit_seasons(series, period=likely_period) adjusted = adjust_seasons(series, seasons=seasons, period=likely_period) if adjusted is None: return numpy.nan return adjusted[-1]
def fillerSeason(ts): tsC = ts.copy() filled = tsC.interpolate().fillna(0) # seasonal decomposition by Season module seasons, trend = fit_seasons(filled) # adjusted season adjusted = adjust_seasons(filled, seasons=seasons) if adjusted is not None: # Residuals residual = adjusted - trend rs = residual.copy() rsTS = pd.Series(rs, index=adjusted.index) # Seasons seasonsTS = filled - adjusted # Trend time series trendTS = pd.Series(trend, index=adjusted.index) stdseason = rsTS.groupby([rsTS.index.month, rsTS.index.day]).median() # Create fake yrs reshape = np.tile(stdseason, 3) reindex = np.tile(stdseason.index, 3) t = pd.Series(reshape, reindex) # TODO decide which filter it's the best one # Smooth by Savinsky Golet #tSV = savgol_filter(t, 5, 2) # TODO change to parametric # # Smooth by boxcar tSV = t.rolling(5, win_type='bartlett', center=True).mean() #parzen tsv = tSV[stdseason.count(): 2 * stdseason.count()] ps = pd.Series(tsv, stdseason.index) nanlist = tsC[tsC.isnull()] for index, value in nanlist.iteritems(): nanlist.loc[index] = stdseason.loc[index.month, index.day] + \ trendTS.loc[index] + \ seasonsTS.loc[index] tsC.update(nanlist) return tsC
def anomaly(data_col, period=7, trend_type="median"): remainder = [] indice = [] NUM_DAYS =60 # flag = False # previous_season = None # season_ = [] # detrend and deseasonalize for i in range((period*2), len(data_col)): if (i>=NUM_DAYS): d=data_col[(i-NUM_DAYS+1):(i + 1)] else: d = data_col[:(i + 1)] # d = data_col[:(i + 1)] seasons, trend = fit_seasons(d, trend=trend_type, period=period, min_ev=0.05) if (seasons is None): seasons = [0L] *period adjusted = adjust_seasons(d, seasons=seasons) residual = adjusted - trend remainder.append(residual[(len(residual)-1)]) indice.append(i) remainder = [round(elem, 1) for elem in remainder] q75, q25 = np.percentile(remainder, [75, 25]) IQR = q75 - q25 low_threshold = q25 - IQR * 1.5 high_threshold = q75 + IQR * 1.5 outliers = [0L] * len(data_col) for i in range(len(remainder)): if (remainder[i] > high_threshold or remainder[i] < low_threshold): outliers[indice[i]] = 1L return outliers
def S_H_ESD(s, period=None, alpha=0.025, hybrid=True): seasons, trend = fit_seasons(s, period=period) adjusted = adjust_seasons(s, seasons=seasons) if adjusted is not None: residual = adjusted - trend else: residual = s - trend max_out = int(len(residual) / 2 - 1) outliers = generalizedESD(residual, maxOLs=max_out, alpha=alpha, hybrid=hybrid)[1] return (seasons, trend, residual, outliers)
def SeasonalDecompose(self): self.seasons, self.tendency, self.window = fit_seasons( self.ts, trend=self.trend, period=self.period, ptimes=self.yearfac, splineseason=self.prefilterseason, forceseason=self.forceseason, kernel=self.kernel) self.detrended = self.ts - self.tendency if self.seasons is None: SNULLEBULLE self.adjusted = self.residual = self.detrendednoise = self.trendseasonal = None else: self.adjusted = adjust_seasons(self.ts, seasons=self.seasons, period=self.period) self.residual = self.adjusted - self.tendency self.fullseasons = self.ts - self.tendency - self.residual
def fit(self, period=7, alpha=0.025): """ first deseasonalize the data using holt-winters. fit using extreme z score with t value calculated from grubb's test. self.seasons: the seasonal period as a short time-series self.trend: the trend extracted by the holt-winters seasonal decomposition self.adjusted: the time-series after seasonality extracted self.residual: the time-series after trend and seasonality extracted self.outliers: a time-series of Boolean values where anomalies are detected self.indices: the indices of True anomaly values in self.outliers """ self.alpha = alpha data = self.series if self.detrended is None else self.detrended if period is not None: trend_method = 'spline' self.seasons, trend = seasonal.fit_seasons(data, periodogram_thresh=0.5, trend=trend_method, period=period) self.trend = pd.Series(trend, index=self.series.index) if self.seasons is None: raise ValueError( 'period {} seasonality could not be extracted from the data' .format(period)) self.adjusted = seasonal.adjust_seasons(data, seasons=self.seasons, trend=trend_method) self.residual = self.adjusted - self.trend self.residual = data if self.residual is None else self.residual self.outliers = [] self.grubbs(self.residual.copy().astype(float).values, self.grubbs_min_index_g) self.grubbs(self.residual.copy().astype(float).values, self.grubbs_max_index_g) self.indices = np.array(self.outliers) self.anomaly = np.zeros(data.size, dtype=bool) self.anomaly[self.outliers] = True return self
def test_trend_spline(): adjusted = adjust_seasons(DATA, trend="spline") assert adjusted.std() < DATA.std()
def test_trend_seasons(): adjusted = adjust_seasons(DATA, trend="line", seasons=SEASONS) assert adjusted.std() < DATA.std()
def test_trend_period(): adjusted = adjust_seasons(DATA, trend="line", period=PERIOD) assert adjusted.std() < DATA.std()
def test_explicit_trend(): trend = fit_trend(DATA, kind="line") adjusted = adjust_seasons(DATA, trend=trend) assert adjusted.std() < DATA.std()
def test_auto(): adjusted = adjust_seasons(DATA) assert adjusted.std() < DATA.std()
def seasonal_decomp(input_data, figure=True): input_tseries = np.array(input_data) ''' input_data : np.ndarray output: trend: trend sequence residual: seasonal: seasonal of input length seasonal_period: period of seasonal series --- adjusted = input_ts - seasonal trend = a moving average residual = adjusted - trend input_ts = adjusted + seasonal = residual + trend + seasonal ''' seasons, trend = pkg_seasonal.fit_seasons(input_tseries) adjusted = pkg_seasonal.adjust_seasons(input_tseries, seasons=seasons) residual = adjusted - trend print('$$ period of seasonal data = ', seasons.size) # append seasons so that the length equals input_tseries eseas = seasons while eseas.size < input_tseries.size: eseas = np.append(eseas, seasons) for i in range(seasons.size): if eseas.size < input_tseries.size: eseas.append(seasons[i]) if eseas.size != input_tseries.size: print( '!! bug: seasonal data length {} not the same as input {}'.format( eseas.size, input_tseries.size)) recon = trend + residual + eseas diff = np.abs(recon - input_tseries) min_error_th = 1E-12 if np.max(diff) > min_error_th: print( '!! bug: The reconstruction error should be almost zero, but not max={}: ', np.max(diff)) if figure == True: rcParams['figure.figsize'] = 15, 7 plt.figure() plt.grid() plt.plot(input_tseries, label='input', color='blue') plt.plot(trend, label='trend', color='red') plt.plot(residual, label='residual', color='black') #plt.plot(adjusted, label='adjusted', color='green') plt.plot(eseas, label='seasonal', color='green') plt.legend(loc='best') if isinstance(input_data, pd.Series): print('$$ output type conversion to {}'.format(type(input_data))) trend = pd.Series(trend, index=input_data.index, name='Trend') residual = pd.Series(residual, index=input_data.index, name='Residual') eseas = pd.Series(eseas, index=input_data.index, name='Seasonal') return trend, residual, eseas, seasons.size
s = [] t_s = [] j = 0 for i in df_cut['temperature_station_x']: if str(i) != 'nan': s.append(i) t_s.append(df.index[j]) j=j+1 a = np.array(s) seasons, trend = fit_seasons(a) adjusted = adjust_seasons(a, seasons=seasons) residual = adjusted - trend #check anomalies position in time b = residual > np.std(residual)*alpha j = 0 p_t = [] for i in b: if i==True: p_t.append((s[j],t_s[j])) j=j+1
import math import numpy as np from seasonal import fit_seasons, adjust_seasons import matplotlib.pyplot as plt # make a trended sine wave s = [10 * math.sin(i * 2 * math.pi / 25) + i * i / 100.0 for i in range(100)] # detrend and deseasonalize seasons, trend = fit_seasons(s) adjusted = adjust_seasons(s, seasons=seasons) residual = adjusted - trend # visualize results plt.figure() plt.plot(s, label='data') plt.plot(trend, label='trend') plt.plot(seasons, label='season_period') plt.plot(residual, label='residual') plt.legend(loc='upper left') plt.show() # how about with some noise? noisy = s + np.random.normal(0, 5, len(s)) seasons, trend = fit_seasons(noisy) adjusted = adjust_seasons(noisy, seasons=seasons) residual = adjusted - trend plt.figure() plt.plot(noisy, label='noisy') plt.plot(noisy - residual, label='trend+season')
def test_period(): adjusted = adjust_seasons(DATA, period=PERIOD) assert adjusted.std() < DATA.std() adjusted = adjust_seasons(DATA, period=PERIOD // 2) # no seasonality assert adjusted is None
def test_seasons(): adjusted = adjust_seasons(DATA, seasons=SEASONS) assert adjusted.std() < DATA.std()
data = input_var.data nt, ny, nx = data.shape data = data.reshape(nt, ny * nx) tmax, ngrid = data.shape diur = np.zeros((8, ngrid)) # _number of hours = 3 hourly output for hr in np.arange(8): idx = np.arange(hr, tmax, 8) diur[hr, :] = data[idx].mean(axis=0) day_cyc = np.reshape(diur, (8, ny, nx)) diur = np.tile(diur, [int(len(input_var.coord('t').points) / 8.), 1 ]) # 24 hours in 37 days data = (data - diur).reshape(nt, ny, nx) else: data = input_var.values diur = np.zeros((24)) for hr in np.arange(24): idx = np.arange(hr, len(data), 24) diur[hr] = data[idx].mean(axis=0) day_cyc = np.reshape(diur, (24)) diur = np.tile(diur, [int(len(data) / 24.), 1]) return data, day_cyc from seasonal import fit_seasons, adjust_seasons seasons, trend = fit_seasons(full_srs['Ts'].data[:, lon_dict['AWS14'], lat_dict['AWS14']]) adjusted = adjust_seasons(full_srs['Ts'][:, lon_dict['AWS14'], lat_dict['AWS14']].data, seasons=seasons) seas = full_srs['Ts'][:, lon_dict['AWS14'], lat_dict['AWS14']].data - trend residual = adjusted - trend