Ejemplo n.º 1
0
def main():
    Na, Nk = 600, 600

    with open(
            '{training_data}.pickle'.format(training_data=args.training_data),
            'rb') as training_data:
        x = pickle.load(training_data)[:, ::2]
    x = standardization(x)
    residual_x = np.zeros((Na * Nk, x.shape[1] // 2))
    for i in range(0, Na * Nk):
        seasons, trend = fit_seasons(x[i, :])
        if seasons is None:
            residual = x[i, :] - trend
            residual_x[i, :] = residual
        else:
            residual_x[i, :] = adjust_seasons(x[i, :], seasons=seasons) - trend
    with open('{out}.pickle'.format(out=args.out), 'wb') as residual_x_pickle:
        pickle.dump(residual_x,
                    residual_x_pickle,
                    protocol=pickle.HIGHEST_PROTOCOL)

    res_mean = np.mean(residual_x, axis=1)
    with open('{out}_mean.pickle'.format(out=args.out),
              'wb') as residual_x_mean_pickle:
        pickle.dump(res_mean,
                    residual_x_mean_pickle,
                    protocol=pickle.HIGHEST_PROTOCOL)

    res_var = np.var(residual_x, ddof=1, axis=1)
    with open('{out}_var.pickle'.format(out=args.out),
              'wb') as residual_x_var_pickle:
        pickle.dump(res_var,
                    residual_x_var_pickle,
                    protocol=pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 2
0
def madseason(t, minW, maxW, mad_pwr):

    # seasonal decomposition by Season module
    seasons, trend = fit_seasons(t)

    # adjusted season
    adjusted = adjust_seasons(t, seasons=seasons)

    if adjusted is not None:
        # Residuals
        residual = adjusted - trend
        # rs = residual.copy()

        # Seasons
        seasons = t - adjusted

        # Trend time series
        trend = pd.Series(trend, index=adjusted.index)

        # Cleaner
        cleaned = dbl_mad_clnr(residual, minW, maxW, mad_pwr)

        # Reconstructed time series
        timeseries = trend + seasons + cleaned

        return timeseries

    else:
        return None
Ejemplo n.º 3
0
def ano_points(data_col, period=7, trend_type="median"):

    remainder = []
    indice = []
    NUM_DAYS =period * 30

    # detrend and deseasonalize
    print "len(data_col) = ", len(data_col)
    for i in range((period*30), len(data_col)):
        if (i>=NUM_DAYS):
            d=data_col[(i-NUM_DAYS+1):(i + 1)]
        else:
            d = data_col[:(i + 1)]
        # d = data_col[:(i + 1)]
        seasons, trend = fit_seasons(d, trend=trend_type, period=period)

        if (seasons is None):
            #print "none at ", i
            seasons = [0L] *period





        adjusted = adjust_seasons(d, seasons=seasons)
        # print "seasons", seasons
        # print "adjusted", adjusted
        # print "trend", trend
        residual = adjusted - trend
        # flag=True
        # previous_season = seasons
        # season_.append(np.mean(seasons.tolist()))

        remainder.append(residual[(len(residual)-1)])
        indice.append(i)
        print "i =", i

    remainder = [round(elem, 1) for elem in remainder]

    q75, q25 = np.percentile(remainder, [75, 25])
    IQR = q75 - q25

    low_threshold = q25 - IQR * 1.5
    high_threshold = q75 + IQR * 1.5

    outliers = [0L] * len(data_col)

    for i in range(len(remainder)):
        if (remainder[i] > high_threshold or remainder[i] < low_threshold):
            outliers[indice[i]] = 1L
            #print indice[i]


    # season_ = list(np.array(season_).flat)
    # print season_
    # print len(season_)
    return outliers
Ejemplo n.º 4
0
    def _remove_seasonality(self, series, likely_period = None):
        from seasonal import fit_seasons, adjust_seasons

        # detrend and deseasonalize
        seasons, trend = fit_seasons(series, period=likely_period)
        adjusted = adjust_seasons(series, seasons=seasons, period=likely_period)

        if adjusted is None:
            return numpy.nan

        return adjusted[-1]
Ejemplo n.º 5
0
    def _remove_seasonality(self, series, likely_period=None):
        from seasonal import fit_seasons, adjust_seasons

        # detrend and deseasonalize
        seasons, trend = fit_seasons(series, period=likely_period)
        adjusted = adjust_seasons(series, seasons=seasons, period=likely_period)

        if adjusted is None:
            return numpy.nan

        return adjusted[-1]
Ejemplo n.º 6
0
def fillerSeason(ts):

    tsC = ts.copy()

    filled = tsC.interpolate().fillna(0)

    # seasonal decomposition by Season module
    seasons, trend = fit_seasons(filled)

    # adjusted season
    adjusted = adjust_seasons(filled, seasons=seasons)

    if adjusted is not None:
        # Residuals
        residual = adjusted - trend
        rs = residual.copy()
        rsTS = pd.Series(rs, index=adjusted.index)

        # Seasons
        seasonsTS = filled - adjusted

        # Trend time series
        trendTS = pd.Series(trend, index=adjusted.index)

    stdseason = rsTS.groupby([rsTS.index.month, rsTS.index.day]).median()

    # Create fake yrs
    reshape = np.tile(stdseason, 3)
    reindex = np.tile(stdseason.index, 3)
    t = pd.Series(reshape, reindex)

    # TODO decide which filter it's the best one
    # Smooth by Savinsky Golet
    #tSV = savgol_filter(t, 5, 2)  # TODO change to parametric

    # # Smooth by boxcar
    tSV = t.rolling(5, win_type='bartlett', center=True).mean()  #parzen

    tsv = tSV[stdseason.count(): 2 * stdseason.count()]
    ps = pd.Series(tsv, stdseason.index)

    nanlist = tsC[tsC.isnull()]

    for index, value in nanlist.iteritems():

        nanlist.loc[index] = stdseason.loc[index.month, index.day] + \
                             trendTS.loc[index] + \
                             seasonsTS.loc[index]

    tsC.update(nanlist)

    return tsC
def anomaly(data_col, period=7, trend_type="median"):
    remainder = []
    indice = []
    NUM_DAYS =60
    # flag = False
    # previous_season = None
    # season_ = []
    # detrend and deseasonalize
    for i in range((period*2), len(data_col)):
        if (i>=NUM_DAYS):
            d=data_col[(i-NUM_DAYS+1):(i + 1)]
        else:
            d = data_col[:(i + 1)]
        # d = data_col[:(i + 1)]
        seasons, trend = fit_seasons(d, trend=trend_type, period=period, min_ev=0.05)


        if (seasons is None):
            seasons = [0L] *period





        adjusted = adjust_seasons(d, seasons=seasons)
       
        residual = adjusted - trend
       

        remainder.append(residual[(len(residual)-1)])
        indice.append(i)

    remainder = [round(elem, 1) for elem in remainder]

    q75, q25 = np.percentile(remainder, [75, 25])
    IQR = q75 - q25

    low_threshold = q25 - IQR * 1.5
    high_threshold = q75 + IQR * 1.5

    outliers = [0L] * len(data_col)

    for i in range(len(remainder)):
        if (remainder[i] > high_threshold or remainder[i] < low_threshold):
            outliers[indice[i]] = 1L

    return outliers
Ejemplo n.º 8
0
def S_H_ESD(s, period=None, alpha=0.025, hybrid=True):

    seasons, trend = fit_seasons(s, period=period)
    adjusted = adjust_seasons(s, seasons=seasons)
    if adjusted is not None:
        residual = adjusted - trend
    else:
        residual = s - trend

    max_out = int(len(residual) / 2 - 1)

    outliers = generalizedESD(residual,
                              maxOLs=max_out,
                              alpha=alpha,
                              hybrid=hybrid)[1]

    return (seasons, trend, residual, outliers)
Ejemplo n.º 9
0
 def SeasonalDecompose(self):
     self.seasons, self.tendency, self.window = fit_seasons(
         self.ts,
         trend=self.trend,
         period=self.period,
         ptimes=self.yearfac,
         splineseason=self.prefilterseason,
         forceseason=self.forceseason,
         kernel=self.kernel)
     self.detrended = self.ts - self.tendency
     if self.seasons is None:
         SNULLEBULLE
         self.adjusted = self.residual = self.detrendednoise = self.trendseasonal = None
     else:
         self.adjusted = adjust_seasons(self.ts,
                                        seasons=self.seasons,
                                        period=self.period)
         self.residual = self.adjusted - self.tendency
         self.fullseasons = self.ts - self.tendency - self.residual
Ejemplo n.º 10
0
    def fit(self, period=7, alpha=0.025):
        """
        first deseasonalize the data using holt-winters.

        fit using extreme z score with t value calculated from grubb's test.

        self.seasons:   the seasonal period as a short time-series
        self.trend:     the trend extracted by the holt-winters seasonal decomposition
        self.adjusted:  the time-series after seasonality extracted
        self.residual:  the time-series after trend and seasonality extracted
        self.outliers:  a time-series of Boolean values where anomalies are detected
        self.indices:   the indices of True anomaly values in self.outliers
        
        """
        self.alpha = alpha
        data = self.series if self.detrended is None else self.detrended

        if period is not None:
            trend_method = 'spline'
            self.seasons, trend = seasonal.fit_seasons(data,
                                                       periodogram_thresh=0.5,
                                                       trend=trend_method,
                                                       period=period)
            self.trend = pd.Series(trend, index=self.series.index)
            if self.seasons is None:
                raise ValueError(
                    'period {} seasonality could not be extracted from the data'
                    .format(period))
            self.adjusted = seasonal.adjust_seasons(data,
                                                    seasons=self.seasons,
                                                    trend=trend_method)
            self.residual = self.adjusted - self.trend

        self.residual = data if self.residual is None else self.residual
        self.outliers = []
        self.grubbs(self.residual.copy().astype(float).values,
                    self.grubbs_min_index_g)
        self.grubbs(self.residual.copy().astype(float).values,
                    self.grubbs_max_index_g)
        self.indices = np.array(self.outliers)
        self.anomaly = np.zeros(data.size, dtype=bool)
        self.anomaly[self.outliers] = True
        return self
Ejemplo n.º 11
0
def test_trend_spline():
    adjusted = adjust_seasons(DATA, trend="spline")
    assert adjusted.std() < DATA.std()
Ejemplo n.º 12
0
def test_trend_seasons():
    adjusted = adjust_seasons(DATA, trend="line", seasons=SEASONS)
    assert adjusted.std() < DATA.std()
Ejemplo n.º 13
0
def test_trend_period():
    adjusted = adjust_seasons(DATA, trend="line", period=PERIOD)
    assert adjusted.std() < DATA.std()
Ejemplo n.º 14
0
def test_explicit_trend():
    trend = fit_trend(DATA, kind="line")
    adjusted = adjust_seasons(DATA, trend=trend)
    assert adjusted.std() < DATA.std()
Ejemplo n.º 15
0
def test_auto():
    adjusted = adjust_seasons(DATA)
    assert adjusted.std() < DATA.std()
def seasonal_decomp(input_data, figure=True):
    input_tseries = np.array(input_data)
    '''
        input_data : np.ndarray
        
        output:
            trend: trend sequence
            residual:
            seasonal: seasonal of input length
            seasonal_period: period of seasonal series
            
        ---
        adjusted = input_ts - seasonal
        trend = a moving average
        residual = adjusted - trend
        
        input_ts = adjusted + seasonal
                 = residual + trend + seasonal
    '''
    seasons, trend = pkg_seasonal.fit_seasons(input_tseries)
    adjusted = pkg_seasonal.adjust_seasons(input_tseries, seasons=seasons)
    residual = adjusted - trend

    print('$$ period of seasonal data = ', seasons.size)

    # append seasons so that the length equals input_tseries
    eseas = seasons
    while eseas.size < input_tseries.size:
        eseas = np.append(eseas, seasons)
    for i in range(seasons.size):
        if eseas.size < input_tseries.size:
            eseas.append(seasons[i])

    if eseas.size != input_tseries.size:
        print(
            '!! bug: seasonal data length {} not the same as input {}'.format(
                eseas.size, input_tseries.size))

    recon = trend + residual + eseas
    diff = np.abs(recon - input_tseries)
    min_error_th = 1E-12
    if np.max(diff) > min_error_th:
        print(
            '!! bug: The reconstruction error should be almost zero, but not max={}: ',
            np.max(diff))

    if figure == True:
        rcParams['figure.figsize'] = 15, 7
        plt.figure()
        plt.grid()
        plt.plot(input_tseries, label='input', color='blue')
        plt.plot(trend, label='trend', color='red')
        plt.plot(residual, label='residual', color='black')
        #plt.plot(adjusted, label='adjusted', color='green')
        plt.plot(eseas, label='seasonal', color='green')
        plt.legend(loc='best')

    if isinstance(input_data, pd.Series):
        print('$$ output type conversion to {}'.format(type(input_data)))
        trend = pd.Series(trend, index=input_data.index, name='Trend')
        residual = pd.Series(residual, index=input_data.index, name='Residual')
        eseas = pd.Series(eseas, index=input_data.index, name='Seasonal')

    return trend, residual, eseas, seasons.size
Ejemplo n.º 17
0
s = []
t_s = []
j = 0

for i in df_cut['temperature_station_x']:
    if str(i) != 'nan':
        s.append(i)
        t_s.append(df.index[j])
    j=j+1
    
a = np.array(s)
 
seasons, trend = fit_seasons(a)
 
adjusted = adjust_seasons(a, seasons=seasons)
 
residual = adjusted - trend


#check anomalies position in time 

b = residual > np.std(residual)*alpha

j = 0
p_t = []
                     
for i in b:
    if i==True:
        p_t.append((s[j],t_s[j]))
    j=j+1
Ejemplo n.º 18
0
import math
import numpy as np
from seasonal import fit_seasons, adjust_seasons
import matplotlib.pyplot as plt

# make a trended sine wave
s = [10 * math.sin(i * 2 * math.pi / 25) + i * i / 100.0 for i in range(100)]

# detrend and deseasonalize
seasons, trend = fit_seasons(s)
adjusted = adjust_seasons(s, seasons=seasons)
residual = adjusted - trend

# visualize results
plt.figure()
plt.plot(s, label='data')
plt.plot(trend, label='trend')
plt.plot(seasons, label='season_period')
plt.plot(residual, label='residual')
plt.legend(loc='upper left')
plt.show()

# how about with some noise?
noisy = s + np.random.normal(0, 5, len(s))
seasons, trend = fit_seasons(noisy)
adjusted = adjust_seasons(noisy, seasons=seasons)
residual = adjusted - trend

plt.figure()
plt.plot(noisy, label='noisy')
plt.plot(noisy - residual, label='trend+season')
Ejemplo n.º 19
0
def test_period():
    adjusted = adjust_seasons(DATA, period=PERIOD)
    assert adjusted.std() < DATA.std()
    adjusted = adjust_seasons(DATA, period=PERIOD // 2) # no seasonality
    assert adjusted is None
Ejemplo n.º 20
0
def test_seasons():
    adjusted = adjust_seasons(DATA, seasons=SEASONS)
    assert adjusted.std() < DATA.std()
Ejemplo n.º 21
0
        data = input_var.data
        nt, ny, nx = data.shape
        data = data.reshape(nt, ny * nx)
        tmax, ngrid = data.shape
        diur = np.zeros((8, ngrid))  # _number of hours = 3 hourly output
        for hr in np.arange(8):
            idx = np.arange(hr, tmax, 8)
            diur[hr, :] = data[idx].mean(axis=0)
        day_cyc = np.reshape(diur, (8, ny, nx))
        diur = np.tile(diur, [int(len(input_var.coord('t').points) / 8.), 1
                              ])  # 24 hours in 37 days
        data = (data - diur).reshape(nt, ny, nx)
    else:
        data = input_var.values
        diur = np.zeros((24))
        for hr in np.arange(24):
            idx = np.arange(hr, len(data), 24)
            diur[hr] = data[idx].mean(axis=0)
        day_cyc = np.reshape(diur, (24))
        diur = np.tile(diur, [int(len(data) / 24.), 1])
    return data, day_cyc


from seasonal import fit_seasons, adjust_seasons
seasons, trend = fit_seasons(full_srs['Ts'].data[:, lon_dict['AWS14'],
                                                 lat_dict['AWS14']])
adjusted = adjust_seasons(full_srs['Ts'][:, lon_dict['AWS14'],
                                         lat_dict['AWS14']].data,
                          seasons=seasons)
seas = full_srs['Ts'][:, lon_dict['AWS14'], lat_dict['AWS14']].data - trend
residual = adjusted - trend