コード例 #1
1
ファイル: timeseries.py プロジェクト: tannishk/data-profiling
def decompose(df,col,freq):
    "To plot the decomposition graphs "
    decomposed = seasonal_decompose(df[col].values, freq=freq)
    pd.DataFrame(decomposed.observed).plot(figsize=(12,4), title = "Observed")
    pd.DataFrame(decomposed.trend).plot(figsize=(12,4), title = "Trend")
    pd.DataFrame(decomposed.seasonal).plot(figsize=(12,4), title = "Seasonal")
    pd.DataFrame(decomposed.resid).plot(figsize=(12,4), title = "Residuals")
コード例 #2
0
ファイル: test_seasonal.py プロジェクト: 5267/statsmodels
    def test_pandas(self):
        res_add = seasonal_decompose(self.data, freq=4)
        freq_override_data = self.data.copy()
        freq_override_data.index = DatetimeIndex(start='1/1/1951', periods=len(freq_override_data), freq='A')
        res_add_override = seasonal_decompose(freq_override_data, freq=4)
        seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                    -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17,
                    -88.38, -60.25, 62.46, 86.17, -88.38, -60.25,
                     62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                    -60.25, 62.46, 86.17, -88.38, -60.25]
        trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75,
                 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
                 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
                 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25,
                 103.00, 131.62, np.nan, np.nan]
        random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371,
                  -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299,
                  -33.246, -27.746, 46.165, -57.924, 28.004, -36.746,
                  -37.585, 151.826, -75.496, 86.254, -10.210, -194.049,
                  48.129, 11.004, -40.460, 143.201, np.nan, np.nan]
        assert_almost_equal(res_add.seasonal.values.squeeze(), seasonal, 2)
        assert_almost_equal(res_add.trend.values.squeeze(), trend, 2)
        assert_almost_equal(res_add.resid.values.squeeze(), random, 3)
        assert_almost_equal(res_add_override.seasonal.values.squeeze(), seasonal, 2)
        assert_almost_equal(res_add_override.trend.values.squeeze(), trend, 2)
        assert_almost_equal(res_add_override.resid.values.squeeze(), random, 3)
        assert_equal(res_add.seasonal.index.values.squeeze(),
                            self.data.index.values)

        res_mult = seasonal_decompose(np.abs(self.data), 'm', freq=4)
        res_mult_override = seasonal_decompose(np.abs(freq_override_data), 'm', freq=4)
        seasonal = [1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716,
                    0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538,
                    0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815,
                    1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931,
                    1.0815, 1.5538, 0.6716, 0.6931]
        trend = [np.nan, np.nan, 171.62, 204.00, 221.25, 245.12, 319.75,
                 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
                 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
                 191.00, 166.88, 107.25, 80.50, 79.12, 78.75, 116.50,
                 140.00, 157.38, np.nan, np.nan]
        random = [np.nan, np.nan, 1.29263, 1.51360, 1.03223, 0.62226,
                  1.04771, 1.05139, 1.20124, 0.84080, 1.28182, 1.28752,
                  1.08043, 0.77172, 0.91697, 0.96191, 1.36441, 0.72986,
                  1.01171, 0.73956, 1.03566, 1.44556, 0.02677, 1.31843,
                  0.49390, 1.14688, 1.45582, 0.16101, 0.82555, 1.47633,
                  np.nan, np.nan]

        assert_almost_equal(res_mult.seasonal.values.squeeze(), seasonal, 4)
        assert_almost_equal(res_mult.trend.values.squeeze(), trend, 2)
        assert_almost_equal(res_mult.resid.values.squeeze(), random, 4)
        assert_almost_equal(res_mult_override.seasonal.values.squeeze(), seasonal, 4)
        assert_almost_equal(res_mult_override.trend.values.squeeze(), trend, 2)
        assert_almost_equal(res_mult_override.resid.values.squeeze(), random, 4)
        assert_equal(res_mult.seasonal.index.values.squeeze(),
                            self.data.index.values)
コード例 #3
0
    def test_pandas_nofreq(self):
        # issue #3503
        nobs = 100
        dta = pd.Series([x % 3 for x in range(nobs)] + np.random.randn(nobs))
        res_np = seasonal_decompose(dta.values, freq=3)
        res = seasonal_decompose(dta, freq=3)

        atol = 1e-8
        rtol = 1e-10
        assert_allclose(res.seasonal.values.squeeze(), res_np.seasonal,
                        atol=atol, rtol=rtol)
        assert_allclose(res.trend.values.squeeze(), res_np.trend,
                        atol=atol, rtol=rtol)
        assert_allclose(res.resid.values.squeeze(), res_np.resid,
                        atol=atol, rtol=rtol)
コード例 #4
0
ファイル: tsa.py プロジェクト: aarora79/sitapt
def _create_grid_plot_of_trends(df, X, col_list, filename):

    width  = 600
    height = 400
        
    color_palette = [ 'Black', 'Red', 'Purple', 'Green', 'Brown', 'Yellow', 'Cyan', 'Blue', 'Orange', 'Pink']
    i = 0
    #2 columns, so number of rows is total /2 
    row_index = 0
    row_list = []
    row = []
    for col in col_list[1:]: #skip the date column
        # create a new plot
        s1 = figure(x_axis_type = 'datetime', width=width, plot_height=height, title=col + ' trend')
        #seasonal decompae to extract seasonal trends
        decomposition = seasonal_decompose(np.array(df[col]), model='additive', freq=15)  
        s1.line(X, decomposition.trend, color=color_palette[i % len(color_palette)], alpha=0.5, line_width=2)

        row.append(s1)
        if len(row) == 2:
            row_copy = copy.deepcopy(row)
            row_list.append(row_copy)
            row = []
            i = 0
        i += 1
        

    # put all the plots in a grid layout
    p = gridplot(row_list)

    save(vplot(p), filename=filename, title='trends')  
コード例 #5
0
ファイル: timeseries.py プロジェクト: greatObelix/datatoolbox
 def make_stationary(self):
     # remove trend and seasonality 
     #for positive trend, to penalize higher values do log/squqreroot/cube root etc...
     self.ts_log = np.log(self.df)
     
     #estimate or model trend, then remove from the series. diff appraoches        
     # aggregation: take avg for monthly/weekly avg
     # smooth: taking rolling avg
     # poly fit : fit a regression model
     
     # Exanoke 1: using smoothing as example, rolling avg
     moving_avg = pd.rolling_mean(self.df,window=287)
     ts_log_moving_avg_diff = self.ts_log - moving_avg
     ts_log_moving_avg_diff.dropna(inplace=True)
     
     # Example 2: using exponential weighted moving avg (EWMA)
     # halflife is same as window, how many datapoint to make up 1 cycle
     expwighted_avg = pd.ewma(self.ts_log, halflife=287)
     ts_log_ewma_diff = self.ts_log - expwighted_avg
     
     # Example 3: differencing: take the difference of the observation at a particular instant 
     # with that at the previous instant
     self.ts_log_diff = self.ts_log - self.ts_log.shift()
     
     # Example 4: decomposing
     # trend and seasonality are modeled separately and the remaining part of the series is returned
     # pandas.DataFrame with index doesn't work, need to pass in numpy value as datafram.values
     decomposition = seasonal_decompose(ts_log.values, freq=288)
     trend = decomposition.trend
     seasonal = decomposition.seasonal
     residual = decomposition.resid
コード例 #6
0
ファイル: VARMA.py プロジェクト: manuwhs/Trapyng
def seasonal_decompose(timeSeries, freq = 34):
    # Seasonal decomposition using moving averages
    decomposition = tsa_seasonal.seasonal_decompose(timeSeries, freq = freq)
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid

    return [trend, seasonal, residual]
コード例 #7
0
ファイル: timeseries.py プロジェクト: mkgunasinghe/examples
def decomp(ts):
	decomposition = seasonal_decompose(ts[Y_name])
	fig = decomposition.plot() 
	plt.tight_layout()
	fig.savefig('decomp.png', bbox_inches="tight")
	trend = decomposition.trend
	seasonal = decomposition.seasonal
	resid = decomposition.resid
コード例 #8
0
ファイル: test_seasonal.py プロジェクト: cong1989/statsmodels
    def test_interpolate_trend(self):
        x = np.arange(6)
        trend = seasonal_decompose(x, freq=2).trend
        assert_equal(trend[0], np.nan)

        trend = seasonal_decompose(x, freq=2, extrapolate_trend=1).trend
        assert_almost_equal(trend, x)

        trend = seasonal_decompose(x, freq=2, extrapolate_trend='freq').trend
        assert_almost_equal(trend, x)

        # 2d case
        x = np.tile(np.arange(6), (2, 1)).T
        trend = seasonal_decompose(x, freq=2, extrapolate_trend=1).trend
        assert_almost_equal(trend, x)

        trend = seasonal_decompose(x, freq=2, extrapolate_trend='freq').trend
        assert_almost_equal(trend, x)
コード例 #9
0
ファイル: process.py プロジェクト: pthaike/comp
def decompose_pre(ts):
	ts_log = np.log(ts)
	decomposition = seasonal_decompose(ts_log.values, freq = 24)
	# decomposition.plot()
	# plt.show(block= False)
	ts_log_decompose = ts_log
	ts_log_decompose.plays = decomposition.resid
	# print ts_log_decompose
	ts_log_decompose.dropna(inplace = True)
	stationarity_test(ts_log_decompose)
	return ts_log_decompose
コード例 #10
0
ファイル: timeseries.py プロジェクト: tannishk/data-profiling
def freq(df,col,max1):
    "To find the required freq for the decompostion "

    count = None
    for i in range(1,max1):
        try:
            decomposed = seasonal_decompose(df[col].values, freq=i)
            decomposed.resid = decomposed.resid[[~np.isnan(decomposed.resid)]]
            print decomposed.resid
        ##decomposed.resid = [1,2,1,2,1,2]
            x = np.array(decomposed.resid)
            z,p = stats.kstest(x,'norm')
            if(p<0.055):
              print 'It is not the required freq'
            else:
                print 'it is the required freq'
                count = i
        except ValueError:
            pass
    decompose(df,col,i)
    return count
コード例 #11
0
ファイル: test_seasonal.py プロジェクト: 5267/statsmodels
 def test_filt(self):
     filt = np.array([1/8., 1/4., 1./4, 1/4., 1/8.])
     res_add = seasonal_decompose(self.data.values, filt=filt, freq=4)
     seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                 -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17,
                 -88.38, -60.25, 62.46, 86.17, -88.38, -60.25,
                  62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                 -60.25, 62.46, 86.17, -88.38, -60.25]
     trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75,
              451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
              316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
              191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25,
              103.00, 131.62, np.nan, np.nan]
     random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371,
               -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299,
               -33.246, -27.746, 46.165, -57.924, 28.004, -36.746,
               -37.585, 151.826, -75.496, 86.254, -10.210, -194.049,
               48.129, 11.004, -40.460, 143.201, np.nan, np.nan]
     assert_almost_equal(res_add.seasonal, seasonal, 2)
     assert_almost_equal(res_add.trend, trend, 2)
     assert_almost_equal(res_add.resid, random, 3)
コード例 #12
0
XGB_result.index = XGB_result.index.astype('string')
XGB_result.drop(['Theoretical'],axis=1,inplace=True)
XGB_result=XGB_result.T
XGB_result.to_json('Muppandal_Predictions.json', orient='records')

end_time = datetime.now()
print("Time required to run a single script:", end_time - start_time)

"""# Dilated CNN"""

from pmdarima.arima import auto_arima

from statsmodels.tsa.seasonal import seasonal_decompose
plt.figure(figsize=(14,6))
temp_t = df['Energy'][7200:7600]
decompose = seasonal_decompose(temp_t,model= 'add')
decompose.plot();

from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing
alpha = 0.1


EXP_df = df['Energy']
train_bound = EXP_df.shape[0]-48*1
EXP_df_train = EXP_df[:train_bound]
EXP_df_test = EXP_df[-48:]

forecast_length = 48
EXP_results = pd.DataFrame()
EXP_results['Theoretical']=EXP_df_test
コード例 #13
0
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator

df = pd.read_csv('./statmodel/Data/Alcohol_Sales.csv',
                 index_col='DATE',
                 parse_dates=True)
df.index.freq = 'MS'
df.columns = ['Sales']
df.plot()
plt.show()
result = seasonal_decompose(df['Sales'])
result.plot()
plt.show()

train = df.iloc[:313]
test = df.iloc[313:]
print(len(test))
scaler = MinMaxScaler()
scaler.fit(train)  #finds the ma vale on the training data set
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

n_input = 2
n_features = 1
generator = TimeseriesGenerator(scaled_train,
                                scaled_train,
コード例 #14
0
ファイル: timeSeriesPy.py プロジェクト: drapadubok/Playground
#Weighted MA, when we don't know the period, adjust parameters when necessary
# http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions
expweighted_MA = data_log.ewm(min_periods=0, 
                              adjust=True, 
                              ignore_na=False, 
                              halflife=12).mean()
data_log_ewma = data_log - expweighted_MA
test_stationarity(data_log_ewma)

# Differencing, first order, subtract t+1 from t
data_log_diff = data_log - data_log.shift()
plt.plot(data_log_diff)

# Decomposition, could be very useful but need to understand better how to add back to the forecast
decomposition = seasonal_decompose(data_log)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid # what is left after removing trend and seasonal
plt.subplot(411)
plt.plot(data_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
コード例 #15
0
def make_stationary(time_series_data):
    """
    One of the first tricks to reduce trend can be transformation. For example, in this case we can clearly see that
    there is a significant positive trend. So we can apply transformation which penalize higher values more than smaller
    values. These can be taking a log, square root, cube root, etc.
    :param time_series_data:
    :return:
    """
    ts_log = np.log(time_series_data)
    plt.plot(ts_log)
    plt.show()
    ##The visible forward trend needs to be now removed from the data (right now the trend and noise are present

    ##Estimating trend from the data
    ##Aggregation - taking average for a time period like monthly/weekly averages
    ##Smoothing - taking rolling averages
    ##Polynomial Fitting - fit a regression model

    ##Smoothing
    #we take average of k consecutive values depending on the frequency of time series. Here we can take the average
    # over the past 1 year, i.e. last 12 values.
    moving_avg = pd.rolling_mean(ts_log, 12)
    plt.plot(moving_avg, color = "green")
    plt.plot(ts_log, color = "red")
    plt.show()
    #since we are taking average of last 12 values, rolling mean is not defined for first 11 values. This can be
    # observed as
    ts_log_moving_avg_diff = ts_log-moving_avg
    print ts_log_moving_avg_diff.head(14)
    #Let us drop the first 11 NAN values
    avg_diff = ts_log_moving_avg_diff.dropna()

    stationary(avg_diff)
    result = dickey_fuller(avg_diff["#Passengers"])
    print result
    ##Now we can see that rolling values appear to be varying slightly but there is no specific trend. Also, the test
    # statistic is smaller than the 5% critical values so we can say with 95% confidence that this is a stationary series.

    # However, a drawback in this particular approach is that the time-period has to be strictly defined. In this case
    # we can take yearly averages but in complex situations like forecasting a stock price, its difficult to come up
    # with a number. So we take a 'weighted moving average' where more recent values are given a higher weight. There
    # can be many technique for assigning weights. A popular one is exponentially weighted moving average where weights
    # are assigned to all the previous values with a decay factor
    weight_ma = pd.ewma(ts_log, halflife = 12)
    ts_log_weight_diff = ts_log-weight_ma
    plt.plot(weight_ma, color="green")
    plt.plot(ts_log, color="red")
    plt.show()
    stationary(ts_log_weight_diff)
    #This TS has even lesser variations in mean and standard deviation in magnitude. Also, the test statistic is smaller
    # than the 1% critical value, which is better than the previous case. Note that in this case there will be no
    # missing values as all values from starting are given weights. So it'll work even with no previous values.

    ##Removing trend and seasonality from a highly seasonal data
    #Differencing - taking the differece with a particular time lag
    #Decomposition - modeling both trend and seasonality and removing them from the model
    #1. differencing
    ts_log_diff = ts_log - ts_log.shift()
    plt.plot(ts_log_diff)
    plt.show()
    #trend seems to have been reduced significantly
    # print ts_log_diff #first value is unkown because its is estimating by shifting
    ts_log_diff.dropna(inplace = True)
    stationary(ts_log_diff)
    #Dickey-Fuller test statistic is less than the 10% critical value, thus the TS is stationary with 90% confidence.
    # We can also take second or third order differences which might get even better results in certain applications.
    ts_log_diff2 = ts_log - ts_log.shift(periods = 2)
    plt.plot(ts_log_diff2)
    plt.show()
    # trend seems to have been reduced significantly
    # print ts_log_diff2  # first value is unkown because its is estimating by shifting
    ts_log_diff2.dropna(inplace=True)
    stationary(ts_log_diff2)

    ##2. Decomposing
    # both trend and seasonality are modeled separately and the remaining part of the series is returned.
    decomp = seasonal_decompose(ts_log)
    trend =  decomp.trend
    season = decomp.seasonal
    residual = decomp.resid
    plt.subplot(411)
    plt.plot(ts_log, label='Original')
    plt.legend(loc='best')
    plt.subplot(412)
    plt.plot(trend, label='Trend')
    plt.legend(loc='best')
    plt.subplot(413)
    plt.plot(season, label='Seasonality')
    plt.legend(loc='best')
    plt.subplot(414)
    plt.plot(residual, label='Residuals')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()

    #Lets check stationarity of residuals:
    ts_log_decompose = residual
    ts_log_decompose.dropna(inplace=True)
    stationary(ts_log_decompose)
    #The Dickey-Fuller test statistic is significantly lower than the 1% critical value. This TS is close to stationary.
    return ts_log
コード例 #16
0
### DELETE OUTLIERS
thre=1.3
delete=np.where(resid9<np.mean(resid9)-thre*np.std(resid9))[0]

train0=np.delete(np.array(dataframe.ix[:,0]),delete)
train=np.sqrt(train0)


plt.hist(train)

rollmean = pd.rolling_mean(train, window=20)
rollstd = pd.rolling_std(train, window=20)

ts_log0 = np.log(train)
ts_log=pd.DataFrame(ts_log0).dropna()
decomposition = seasonal_decompose(np.array(ts_log).reshape(len(ts_log),),freq=100)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

z=np.where(seasonal==min(seasonal))[0]
period=z[2]-z[1]

look_back = period

plt.figure(figsize=(8,8))
plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='upper left')
plt.subplot(412)
コード例 #17
0
 def test_2d(self):
     x = np.tile(np.arange(6), (2, 1)).T
     trend = seasonal_decompose(x, freq=2).trend
     expected = np.tile(np.arange(6, dtype=float), (2, 1)).T
     expected[0] = expected[-1] = np.nan
     assert_equal(trend, expected)
コード例 #18
0
    mse = mean_squared_error(valid, model_predictions)
    print('MSE: %f' % mse)
    print("Calcoliamo  MAE=%.4f" % (sum(abs(errore)) / len(errore)))

    # %%
    # Proviamo a usare ETS applicandolo alle componenti trend e stagionalità.
    # Per i residui, essendo una serie di rumore bianco (priva di componenti),
    # viene usato ARIMA perchè con ETS potremmo al limite usare Simple Exponential Smoothing
    # ma non riesce a generare previsioni soddisfacenti.
    #
    # NOTA: qua sommare i residual è di poco conto.

    # Decomponiamo la serie temporale.
    # two_sided=False significa che la media mobile (processo descritto nel notebook)
    # viene calcolata a partire dai valori passati invece che essere normalmente centrata.
    decomposition = seasonal_decompose(train, period=year, two_sided=False)

    # Recuperiamo le componenti
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid

    # Rimuoviamo eventuali valori NaN dalle serie
    trend.dropna(inplace=True)
    seasonal.dropna(inplace=True)
    residual.dropna(inplace=True)

    # Creiamo dei modelli per trend e seasonal
    # USO ARIMA PER I RESIDUAL VISTO CHE SONO UNA COMPONENTE STAZIONARIA

    trend_model = ExponentialSmoothing(trend,
コード例 #19
0
    adft = adfuller(timeseries, autolag='AIC')
    # output for dft will give us without defining what the values are.
    #hence we manually write what values does it explains using a for loop
    output = pd.Series(adft[0:4],
                       index=[
                           'Test Statistics', 'p-value', 'No. of lags used',
                           'Number of observations used'
                       ])
    for key, values in adft[4].items():
        output['critical value (%s)' % key] = values
    print(output)


test_stationarity(df_close)

result = seasonal_decompose(df_close, model='multiplicative', freq=30)
fig = plt.figure()
fig = result.plot()
fig.set_size_inches(16, 9)

from pylab import rcParams
rcParams['figure.figsize'] = 10, 6
df_log = np.log(df_close)
moving_avg = df_log.rolling(12).mean()
std_dev = df_log.rolling(12).std()
plt.legend(loc='best')
plt.title('Moving Average')
plt.plot(std_dev, color="black", label="Standard Deviation")
plt.plot(moving_avg, color="red", label="Mean")
plt.legend()
plt.show()
コード例 #20
0
import os
os.chdir(r"C:\Users\Lenovo\Desktop\umeed\csv files")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

# Read the AirPassengers dataset
airline = pd.read_csv('dairy.csv', index_col='Month', parse_dates=True)

# Print the first five rows of the dataset
airline.head()

# ETS Decomposition
result = seasonal_decompose(airline['#Passengers'],
                            model='multiplicative',
                            period=30)
# Import the library
from pmdarima import auto_arima

# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

# Fit auto_arima function to AirPassengers dataset
stepwise_fit = auto_arima(
    airline['#Passengers'],
    start_p=1,
    start_q=1,
    max_p=3,
    max_q=3,
コード例 #21
0
  
from statsmodels.tsa.seasonal import seasonal_decompose

output = open('AIRMA_mars_tianchi_artist_plays_predict.csv','w')

for artist in artists:
    print artist, len(daily_play[artist])
    y_data =  daily_play[artist][-30:]
    l = len(y_data)
    dates_str = sm.tsa.datetools.date_range_str('2005m1',length=l)
    dates_all = sm.tsa.datetools.dates_from_range('2005m1', length=l)
    y = pd.Series(y_data, index=dates_all)
    plt.plot(y)

    
    decomposition = seasonal_decompose(y)

    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid
    y_decompose = residual
    y_decompose.dropna(inplace=True)

    test_stat(y_decompose)


    # remove moving avg
    moving_avg = y.rolling(window=12,center=False).mean()
    y_moving_avg_diff = y - moving_avg
    y_moving_avg_diff.dropna(inplace=True)
    print "Stationarity for TS - moving avg:"
コード例 #22
0
filename = 'ERA5_Arctic_clouds_1979_2019.csv'

df0 = pd.read_csv(filename, sep=',')
var0_raw = df0['Total cloud cover (%)']
var1_raw = df0['Total column cloud water (g m**-2)']
var0 = var0_raw[0:nmons]
var1 = var1_raw[0:nmons]
y0_raw = df0['Year']
m0_raw = df0['Month']
y0 = y0_raw[0:nmons]
m0 = m0_raw[0:nmons]

######Decompose times series data
# Time Series Decomposition
result_mul0 = seasonal_decompose(var0.values,
                                 model='additive',
                                 freq=12,
                                 two_sided=False)
deseason0 = var0.values / result_mul0.seasonal
detrend0 = var0.values - result_mul0.trend
re0 = result_mul0.resid

result_mul1 = seasonal_decompose(var1.values,
                                 model='additive',
                                 freq=12,
                                 two_sided=False)
deseason1 = var1.values / result_mul1.seasonal
detrend1 = var1.values - result_mul1.trend
re1 = result_mul1.resid

######Save data into csv file
dict = {
コード例 #23
0
#visualise rolling statistics and standard deviation
da.plot_rollingStatistics(usd)
da.plot_rollingStatistics(brent)
da.plot_rollingStatistics(dax)
da.plot_rollingStatistics(nasdaq)
da.plot_rollingStatistics(nasdaq100)
da.plot_rollingStatistics(wti)
da.plot_rollingStatistics(xau)

import preprocessing as pp

usd_log = pp.log_transform(usd)

from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(usd_log)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.subplot(411)
plt.plot(usd_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
コード例 #24
0
# Code example of decompositioning
# prints the four graphs for ONE defined Article

from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib import pyplot

#define id to calculate (single examination)
art_id = [722]

# load and group data to monthly
grouped = group_by_frequence(get_dataframe(art_id))

# prepare data for training
series = grouped['Menge']

result = seasonal_decompose(series, model='additive')

# printing out the values for each series
#print(result.trend)
#print(result.seasonal)
#print(result.resid)
#print(result.observed)

# printing all as charts
result.plot()
pyplot.show()

# In[44]:

# import the data
#%%
view_hour.reset_index(inplace=True)
view_hour = view_hour.set_index('datetime')
view_hour.sort_index(inplace=True)
view_hour.info()

#%%
## Write out the csv file to the local directory
#view_hour.to_csv('/Users/swe03/view_hour.txt', index=True)

#%%
view_hour.describe()

#%%

decomposition = seasonal_decompose(view_hour['distinct_freq_sum'].values,freq=24 )  
  
fig = decomposition.plot()  
fig.set_size_inches(50, 8)

#%%
# Graph Autocorrelation and Partial Autocorrelation data
fig, axes = pplt.subplots(1, 2, figsize=(15,4))

fig = sm.graphics.tsa.plot_acf(view_hour['distinct_freq_sum'], lags=24, ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(view_hour['distinct_freq_sum'], lags=24, ax=axes[1])

#%%
## Specify the SARIMAX model
## Default for the CI is 95%.  Set in the Alpha parameter for conf_int function
コード例 #26
0
residual = np.zeros((datalimit2, np.shape(data)[3]))
#Plot param
plt.rcParams['figure.figsize'] = (10, 5)
#Initialize x and y
x_data = np.arange(0, datalimit2) / calendarYear
y_data_all = data[datalimit1:datalimit2, 0, 0, :]
#Choose scenarios
scenarios = [0, 1, 2, 4, 5, 6, 7, 9]  #range(np.shape(data)[3])

for i in scenarios:
    y_data = data[datalimit1:datalimit2, 0, 0, i]

    #==============================================================================
    #STEP0 - Identify, trend, seasonality, residual

    result = seasonal_decompose(y_data, freq=365, model='additive')

    #Fit lineat trend
    #Get parameters: alpha and beta

    #Fit curve with lmfit
    line_mod = LinearModel(prefix='line_')
    pars_line = line_mod.guess(y_data, x=x_data)
    result_line_model = line_mod.fit(y_data, pars_line, x=x_data)
    print(result_line_model.fit_report())

    line_intercept[:, i] = result_line_model.params['line_intercept']._val
    line_slope[:, i] = result_line_model.params['line_slope']._val

    trend[:, i] = result_line_model.best_fit
    #==============================================================================
コード例 #27
0
ファイル: tsa.py プロジェクト: aarora79/sitapt
def model_feature(file_name, df, feature):
    #first create a directory by feature name to store the results
    file_name_wo_extn = file_name[:-4]
    dir_name = os.path.join(os.path.sep, os.getcwd(), OUTPUT_DIR_NAME, file_name_wo_extn, feature)
    if os.path.exists(dir_name):
        logger.info('dir name is ==> ' + dir_name)
        #delete existing directory if any
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    #temporarily change to the new feature directory
    curr_dir = os.getcwd()
    os.chdir(dir_name)

    #create  a string buffer to store all information about this feature which will then be written to a file at the end
    s = ''
    s = _write_to_string(s, '----------- Time Series Analysis for ' + feature + ' from ' + str(df['Date'][0]) + ' to ' + str(df['Date'][len(df['Date']) - 1]) + '-----------')
    #only look at the fearture of intrest as a univariate time series
    #x-axis is the time..
    X = np.array(df['Date'], dtype=np.datetime64)
    
    #df['First Difference'] = df[feature] - df[feature].shift()  
    y = np.array(df[feature] - df[feature].shift())
    _draw_multiple_line_plot('first_difference.html', 
                             feature, 
                             [X],
                             [y],
                             ['navy'], 
                             ['packets percentage delta'],
                             [None],
                             [1],
                             'datetime', 'Date', 'Packets Percentage Delta', y_start=-100, y_end=100)

    #calculate autocorelation and partial auto corelation for the first difference
    lag_correlations = acf(y[1:])  
    lag_partial_correlations = pacf(y[1:])  

    logger.info ('lag_correlations')
    logger.info(lag_correlations)

    s = _write_to_string(s, 'lag_correlations')
    s = _write_to_string(s, str(lag_correlations))

    y = lag_correlations
    _draw_multiple_line_plot('lag_correlations.html', 
                             'lag_correlations', 
                             [X],
                             [y],
                             ['navy'], 
                             ['lag_correlations'],
                             [None],
                             [1],
                             'datetime', 'Date', 'lag_correlations', y_start=-1, y_end=1)


    logger.info ('lag_partial_correlations')
    logger.info(lag_partial_correlations)
    s = _write_to_string(s, 'lag_partial_correlations')
    s = _write_to_string(s, str(lag_partial_correlations))

    y = lag_partial_correlations
    _draw_multiple_line_plot('lag_partial_correlations.html', 
                             'lag_partial_correlations', 
                             [X],
                             [y],
                             ['navy'], 
                             ['lag_partial_correlations'],
                             [None],
                             [1],
                             'datetime', 'Date', 'lag_partial_correlations', y_start=-1, y_end=1)

    #seasonal decompae to extract seasonal trends
    decomposition = seasonal_decompose(np.array(df[feature]), model='additive', freq=15)  
    _draw_decomposition_plot('decomposition.html', X, decomposition, 'seasonal decomposition', 'datetime', 'decomposition', width=600, height=400)


    #run various ARIMA models..and see which fits best...
    s, model_names, models, results, MAE = _try_ARIMA_and_ARMA_models(s, df, feature)

    #check if we got consistent output, all 4 variables returns by the prev function are
    # lists..they should be the same length
    len_list = [len(model_names), len(models), len(results), len(MAE)]
    if len(len_list) == len_list.count(len_list[0]):
        #looks consistent, all lengths are equal
        logger.info('_try_ARIMA_models output looks consistent, returns %d models ' % len(model_names))
    else:
        logger.info('_try_ARIMA_models output IS NOT consistent, returns %d model names ' % len(model_names))
        logger.info(len_list)
        logger.info('EXITING.....')
        sys.exit()

    s, predicted_dates, predicted, model_selection_list = _do_forecasts(df, feature, X, s, model_names, models, results, MAE)
    
    #write everything to file
    with open(feature + '.txt', "w") as text_file:
        text_file.write(s)
    #go back to parent directory
    os.chdir(curr_dir)

    #return the results
    return feature, model_names, models, results, MAE, predicted_dates, predicted, model_selection_list
コード例 #28
0
                 upper_series,
                 color='k', alpha=.15)

plt.title("SARIMA - Final Forecast of a10 - Drug Sales")
plt.show()

# SARIMAX model with exogenous variable
# as an example, use seasonal index from last 36 months
# see how model looks when we force recent seasonal trend
# Compute Seasonal Index
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse

# multiplicative seasonal component
result_mul = seasonal_decompose(data['value'][-36:],   # 3 years
                                model='multiplicative',
                                extrapolate_trend='freq')

seasonal_index = result_mul.seasonal[-12:].to_frame()
seasonal_index['month'] = pd.to_datetime(seasonal_index.index).month

# merge with the base data
data['month'] = data.index.month
df = pd.merge(data, seasonal_index, how='left', on='month')
df.columns = ['value', 'month', 'seasonal_index']
df.index = data.index  # reassign the index.

sxmodel = pm.auto_arima(df[['value']], exogenous=df[['seasonal_index']],
                           start_p=1, start_q=1,
                           test='adf',
                           max_p=3, max_q=3, m=12,
コード例 #29
0
ファイル: main.py プロジェクト: PrieureDeSion/Randoms
ts_log_moving_avg_diff.dropna(inplace=True)  # Pandas in action :p
# after the above, make sure that the test_statistic is lesser than the critical value.
# For this you can run is_stationary again.
# is_stationary(ts_log_moving_avg_diff, 12)

expwighted_avg = pd.ewma(ts_log, halflife=12)
# Exponential weights make sure that recent observations have more importance

ts_log_ewma_diff = ts_log - expwighted_avg
# test_stationarity(ts_log_ewma_diff)
# On testing, apparently this has a lower test statistic value and hence
# better as a stationary series

from statsmodels.tsa.seasonal import seasonal_decompose

decomposition = seasonal_decompose(ts_log)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.subplot(411)
plt.plot(ts_log, label="Original")
plt.legend(loc="best")
plt.subplot(412)
plt.plot(trend, label="Trend")
plt.legend(loc="best")
plt.subplot(413)
plt.plot(seasonal, label="Seasonality")
plt.legend(loc="best")
plt.subplot(414)
コード例 #30
0
    plt.ylabel('#Maglie vendute')
    plt.xlabel('Data')
    plt.plot(train, label="training set", color=TSC)
    plt.plot(valid, label="validation set", color =VSC, linestyle = '--')
    plt.plot(rolmean, color=OLC, label='Rolling Mean',  linewidth=3)
    plt.plot(rolstd, color=OLC, label='Rolling Std', linestyle = '--',  linewidth=3)
    plt.legend(loc='best')
    plt.show()
    
    mt.ac_pac_function(train, lags = 400)
    
    #%%
    # Decompongo la serie
    # con periodo di 365 o 183 giorni (year e half_year)
    
    result = seasonal_decompose(train,  model = 'additive', period = season, extrapolate_trend='freq')

    #%%

    trend = result.trend
    seasonality = result.seasonal
    residuals = result.resid
    
    strength_seasonal = max(0, 1 - residuals.var()/(seasonality + residuals).var())
    print('La forza della stagionalità di periodo {} è: {}'.format(season, strength_seasonal))
    
    plt.figure(figsize=(40, 20), dpi=80)
    plt.plot(trend)
    plt.figure(figsize=(40, 20), dpi=80)
    plt.plot(residuals)
    plt.figure(figsize=(40, 20), dpi=80)
コード例 #31
0
ファイル: test_seasonal.py プロジェクト: 5267/statsmodels
    def test_ndarray(self):
        res_add = seasonal_decompose(self.data.values, freq=4)
        seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                    -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17,
                    -88.38, -60.25, 62.46, 86.17, -88.38, -60.25,
                     62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38,
                    -60.25, 62.46, 86.17, -88.38, -60.25]
        trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75,
                 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
                 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
                 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25,
                 103.00, 131.62, np.nan, np.nan]
        random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371,
                  -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299,
                  -33.246, -27.746, 46.165, -57.924, 28.004, -36.746,
                  -37.585, 151.826, -75.496, 86.254, -10.210, -194.049,
                  48.129, 11.004, -40.460, 143.201, np.nan, np.nan]
        assert_almost_equal(res_add.seasonal, seasonal, 2)
        assert_almost_equal(res_add.trend, trend, 2)
        assert_almost_equal(res_add.resid, random, 3)

        res_mult = seasonal_decompose(np.abs(self.data.values), 'm', freq=4)

        seasonal = [1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716,
                    0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538,
                    0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815,
                    1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931,
                    1.0815, 1.5538, 0.6716, 0.6931]
        trend = [np.nan, np.nan, 171.62, 204.00, 221.25, 245.12, 319.75,
                 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
                 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
                 191.00, 166.88, 107.25, 80.50, 79.12, 78.75, 116.50,
                 140.00, 157.38, np.nan, np.nan]
        random = [np.nan, np.nan, 1.29263, 1.51360, 1.03223, 0.62226,
                  1.04771, 1.05139, 1.20124, 0.84080, 1.28182, 1.28752,
                  1.08043, 0.77172, 0.91697, 0.96191, 1.36441, 0.72986,
                  1.01171, 0.73956, 1.03566, 1.44556, 0.02677, 1.31843,
                  0.49390, 1.14688, 1.45582, 0.16101, 0.82555, 1.47633,
                  np.nan, np.nan]

        assert_almost_equal(res_mult.seasonal, seasonal, 4)
        assert_almost_equal(res_mult.trend, trend, 2)
        assert_almost_equal(res_mult.resid, random, 4)

        # test odd
        res_add = seasonal_decompose(self.data.values[:-1], freq=4)
        seasonal = [68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66,
                    -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02,
                    -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18,
                    69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54,
                    68.18, 69.02, -82.66]
        trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75,
                 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12,
                 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12,
                 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25,
                 103.00, np.nan, np.nan]
        random = [np.nan, np.nan, 72.538, 64.538, -42.426, -77.150,
                  -12.087, -67.962, 99.699, 120.725, -2.962, -4.462,
                  9.699, 6.850, -38.962, -33.462, 40.449, -40.775, 22.288,
                  -42.462, -43.301, 168.975, -81.212, 80.538, -15.926,
                  -176.900, 42.413, 5.288, -46.176, np.nan, np.nan]
        assert_almost_equal(res_add.seasonal, seasonal, 2)
        assert_almost_equal(res_add.trend, trend, 2)
        assert_almost_equal(res_add.resid, random, 3)
コード例 #32
0
                           fs=FS,
                           nfft=256,
                           window=('tukey', 0.25),
                           detrend='constant',
                           nperseg=60,
                           noverlap=30,
                           scaling='density')
plt.pcolormesh(t, freq, Sxx)
plt.xlabel('Czas [mies]')
plt.ylabel('Częstotliwość [1/mies]')
plt.colorbar().set_label('Widmowa gęstość mocy [V^2/1/mies]')
f.savefig(PATH_TO_PLOTS + '/spectrogram.pdf', bbox_inches='tight')
plt.show()

# signal decomposition = trend + seasonal + error
decomposition = seasonal_decompose(series_monthly, model="additive")
f = decomposition.plot()
f.savefig(PATH_TO_PLOTS + '/decomposition.pdf', bbox_inches='tight')
plt.show()

# split train-test
train = series_monthly.loc[series_monthly.index < SPLIT_DATE]
test = series_monthly.loc[series_monthly.index >= SPLIT_DATE]
print('Train size = {} %'.format(100 * len(train) / len(series_monthly)))
print('Test size = {} %'.format(100 * len(test) / len(series_monthly)))
f = plt.figure()
plt.plot(train)
plt.plot(test)
plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1])
plt.title('Podział na zbiór trenujący i testowy')
plt.xlabel('Data')
コード例 #33
0
ファイル: s_and_p.py プロジェクト: varun10221/ARIMA-model
#acf is auto correlation fucntion and pacf is partial acf (works only for 1 d array)
#iloc is integer location, check pandas

lag_corr = acf (stock_data ['Logged First Difference'].iloc [1:])
lag_partial_corr = pacf (stock_data ['Logged First Difference'].iloc [1:])

#fig, ax = plt.subplots (figsize = (16,12))
#ax.plot (lag_corr)
#pylab.show ()

# To extract trends and seasonal patterns for TS analysis

from statsmodels.tsa.seasonal import seasonal_decompose

#set the frequency value right for monthly set freq = 30
decomposition = seasonal_decompose(stock_data['Natural Log'], model='additive', freq=30)  
#fig = decomposition.plot()  
#pylab.show ()

#lets fit some ARIMA, keep indicator as 1 and rest as zero ie (p,q,r) = (1,0,0)
#the snippet below does it for undifferenced series

#model = sm.tsa.ARIMA (stock_data ['Natural Log'].iloc[1:], order = (1,0,0))
#result = model.fit (disp = -1)
#stock_data ['Forecast'] = result.fittedvalues
#stock_data [['Natural Log', 'Forecast']].plot (figsize = (16,12))
#pylab.show ()

#trying an exponential smoothing model
model = sm.tsa.ARIMA(stock_data['Logged First Difference'].iloc[1:], order=(0, 0, 1))  
results = model.fit(disp=-1)  
コード例 #34
0
ファイル: auto_arima.py プロジェクト: varunkotian/blog
from pyramid.arima import auto_arima
from statsmodels.tsa.seasonal import seasonal_decompose

# Import data
data = pd.read_csv("data/industrial_production.csv", index_col=0)

# Formatting
data.index = pd.to_datetime(data.index, format='%Y-%m-%d')

# Visualize
ax = data.plot()
fig = ax.get_figure()
fig.savefig("output/arima_raw_data_line_plot.png")

# Decomposition plot
result = seasonal_decompose(data, model='multiplicative')
fig = result.plot()
fig.savefig("output/seasonal_decompose_plot.png")

# Perform Seasonal ARIMA
stepwise_model = auto_arima(data,
                            start_p=1,
                            d=1,
                            start_q=1,
                            max_p=1,
                            max_d=1,
                            max_q=1,
                            start_P=1,
                            D=1,
                            start_Q=1,
                            max_P=1,
コード例 #35
0
# multiplicative decompose a contrived multiplicative time series
from matplotlib import pyplot
from statsmodels.tsa.seasonal import seasonal_decompose
series = [i**2.0 for i in range(1,100)]
result = seasonal_decompose(series, model='multiplicative', freq=1)
result.plot()
pyplot.show()
コード例 #36
0
#Moving average
movingAverage= indexedDataset_logScale.rolling(window=12).mean()
movingSTD= indexedDataset_logScale.rolling(window=12).std()
plt.plot(indexedDataset_logScale)
plt.plot(movingAverage,color='red')

datasetLogScaleMinusMovingAverage= indexedDataset_logScale - movingAverage
datasetLogScaleMinusMovingAverage.head(12)

#remove nan values
datasetLogScaleMinusMovingAverage.dropna(inplace=True)
datasetLogScaleMinusMovingAverage.head(10)

#Plotting trend,seasonal,residual error
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition= seasonal_decompose(indexedDataset_logScale,freq=1)

trend= decomposition.trend
seasonal= decomposition.seasonal
residual= decomposition.resid

plt.subplot(411)
plt.plot(indexedDataset_logScale,label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend,label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
コード例 #37
0
ファイル: 13_utility_us_index.py プロジェクト: nusyazdr/ml_3
naive_errors = get_cv_errors(utility_index_cv_splits, naive_predictions)
print("Naive errors:", naive_errors)
plot_cv_predictions(naive_predictions)

average_predictions = make_cv_predictions(utility_index_cv_splits,
                                          average_prediction)
average_errors = get_cv_errors(utility_index_cv_splits, average_predictions)
print("Average errors:", average_errors)
plot_cv_predictions(average_predictions)

sarima_order_kwargs = {"order": (1, 1, 1), "seasonal_order": (1, 1, 1, 12)}
sarima_predictions = make_cv_predictions(utility_index_cv_splits,
                                         sarima_prediction,
                                         **sarima_order_kwargs)
sarima_errors = get_cv_errors(utility_index_cv_splits, sarima_predictions)
print("SARIMA errors:", sarima_errors)
plot_cv_predictions(sarima_predictions)

sarima_extrapolation = sarima_prediction(utility_index_df, 80,
                                         **sarima_order_kwargs)
plt.plot(sarima_extrapolation.index, sarima_extrapolation["value"], color="g")

utility_index_additive_decomposition = statsmodels_seasonal.seasonal_decompose(
    utility_index_df, model="additive", period=12)
utility_index_additive_decomposition.plot()
utility_index_multiplicative_decomposition = statsmodels_seasonal.seasonal_decompose(
    utility_index_df, model="multiplicative", period=12)
utility_index_multiplicative_decomposition.plot()

plt.show()
コード例 #38
0
from scipy import stats
import statsmodels.api as sm
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot

from statsmodels.tsa.stattools import adfuller

# function to calculate MAE, RMSE
from sklearn.metrics import mean_absolute_error, mean_squared_error

df = pd.read_csv('chapter3//TS.csv')
ts = pd.Series(list(df['Sales']),
               index=pd.to_datetime(df['Month'], format='%Y-%m'))

from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(ts)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

ts_log = np.log(ts)
ts_log.dropna(inplace=True)

s_test = adfuller(ts_log, autolag='AIC')
print("Log transform stationary check p value: ", s_test[1])

s_test = adfuller(ts, autolag='AIC')
# extract p value from test results
print("p value > 0.05 means data is non-stationary: ", s_test[1])
コード例 #39
0
                                              subplot_spec=outer_grid[0],
                                              wspace=0.0,
                                              hspace=0.0)

ax = fig.add_subplot(inner_grid[0])

ax.axhline(y=0, c='black', lw=0.5)

baseline_mean = np.array([np.mean(b) for b in B])
baseline_mean_norm =  \
    (baseline_mean - np.min(baseline_mean))/ np.max(baseline_mean)

T = []
for i in range(len(C)):
    result_mul = seasonal_decompose(C[i],
                                    period=21,
                                    model='multiplicative',
                                    extrapolate_trend='freq')
    t = np.array(result_mul.trend)
    t = t - t[0]

    T.append(t)

    c = 'blue'
    alpha = 0.25
    if t[0] < t[-1]:
        c = 'red'
        alpha = 0.5

    ax.plot(range(len(t)), t, '-', lw=baseline_mean_norm[i], alpha=alpha, c=c)

T_mean = np.average(T, axis=0, weights=baseline_mean_norm)
コード例 #40
0
def dftest(series):
    res = adfuller(series)
    p = res[1]
    return p


df = pd.read_csv('milk.csv')
df.columns = ['Month', 'Qty']
df.dropna(inplace=True)

df.set_index('Month', inplace=True)
df.index = pd.to_datetime(df.index)

from statsmodels.tsa.seasonal import seasonal_decompose
df_decomposed = seasonal_decompose(df['Qty'], model='multiplicative')
f = df_decomposed.plot()

df['First Difference'] = df['Qty'] - df['Qty'].shift(1)
df.dropna(inplace=True)

df['Seasonal FD'] = df['First Difference'] - df['First Difference'].shift(12)
df.dropna(inplace=True)
plt.plot(df['Seasonal FD'])
print(dftest(df['Seasonal FD']))

from statsmodels.tsa.stattools import acf, pacf
acfgraph = acf(df['Seasonal FD'], nlags=5)
pacfgraph = pacf(df['Seasonal FD'], nlags=5)

plt.plot(acfgraph)
コード例 #41
0
                                    adjust=True).mean()
df_log_exp_decay = df_log - rolling_mean_exp_decay
df_log_exp_decay.dropna(inplace=True)

get_stationarity(df_log_exp_decay)

# In[69]:

df_log_shift = df_log - df_log.shift()
df_log_shift.dropna(inplace=True)

get_stationarity(df_log_shift)

# In[74]:

decomposition = seasonal_decompose(df_log, freq=100)
model = ARIMA(df_log, order=(2, 1, 2))
results = model.fit(disp=-1)
plt.plot(df_log_shift)
plt.plot(results.fittedvalues, color='red')

# In[78]:

predictions_ARIMA_diff = pd.Series(results.fittedvalues, copy=True)
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
predictions_ARIMA_log = pd.Series(df_log.iloc[0], index=df_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(
    predictions_ARIMA_diff_cumsum, fill_value=0)
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(df['Price'])
plt.plot(predictions_ARIMA)
コード例 #42
0
# two_sided=True, extrapolate_trend=0) is the statsmodels library
# implementation of the naive, or classical, decomposition method.
# It breaks down a time series into 4 graphs, observed (or original), trend
# (whether the graph tends to go up or down), seasonal (repeating short term
# cycles), and residual (or noise).
# model can either be 'additive' or 'multiplicative'. Both will give a result,
# so to determine which one to use look at a df.plot() of the observed values.
# If the magnitude of the seasonal variations appear to increase over time,
# it is multiplicative. If they stay the same, it is additive. It is possible
# to transform data into being additive by using sqrt for quadratic trend or
# ln for exponential trend. In practice I fail to see the difference in result.
# Furthermore, more advanced decomposition methods are advised over this one.
# Later versions of statsmodels include STL decomposition under:
# from statsmodels.tsa.seasonal import STL
# There is also the facebook prophet package.
decompose = seasonal_decompose(data['DOW'], model='additive')
# You may plot the 4 graphs individually by using:
# decompose.observed.plot()
# decompose.trend.plot()
# decompose.seasonal.plot()
# decompose.resid.plot()
decompose.plot()
plt.show()
plt.close()

# We can tell a number of things from the seasonal_decompose plots. A upward
# trend means the data is not stationary, meaning the mean and variance is not
# constant over time. This is needed for ARIMA models having a constant
# expected value unaffected by trend makes it easier to model. We can confirm
# stationarity using the Augmented Dickey-Fuller test.
# adfuller(x, maxlag=None, regression='c', autolag='AIC', store=False,
コード例 #43
0
decomp = decomp.set_index(pd.DatetimeIndex(decomp['Date']))



# In[65]:

#interpolate missing values
#decomp['LogSales'].interpolate(inplace=True)


# In[75]:

#decompose time series
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(decomp['LogSales'], model='additive', freq=12)  
decomposition2 = seasonal_decompose(decomp['Customers'], model='additive', freq=12)


# In[37]:

#%pylab inline
#fig = plt.figure()  
#fig = decomposition.plot()  


# In[ ]:



コード例 #44
0
samples = SequenceDinucProperties(npath, ppath)

X = samples.getX()
print(X.shape)
print('>>>')
tmp = X[:, 0, 30, 0]
print(tmp.shape)
print(tmp)
plt.hist(tmp)

sample = X[7, :, :, :]
sample = sample.reshape(sample.shape[2], sample.shape[1])
print(sample.shape)
print(sample)

diff = list()
for i in range(1, sample.shape[1]):
    val = sample[1, i] - sample[1, i - 1]
    diff.append(val)

plt.plot(sample[1, :])
plt.plot(diff)

result = seasonal_decompose(sample[1, :], model='additive', freq=1)
#print(result.trend)
#print(result.seasonal)
#print(result.resid)
#print(result.observed)

result.plot()
print('=' * 10)
コード例 #45
0
ファイル: diff.py プロジェクト: aarora79/sitapt
print x


#seasonal decompae to extract seasonal trends
X = []
y = []
labels = []
line_width = []
dash_type = []
color_palette = [ 'Pink', 'Red', 'Orange', 'Yellow', 'Brown', 'Green', 'Cyan', 'Blue', 'Purple', 'Black']
c = []
for i in range(len(df2)):

    if df2['name'][i] == 'https' or df2['name'][i] == '0-100':
        continue
    decomposition = seasonal_decompose(np.array(df[df2['name'][i]]), model='additive', freq=5)  
    X.append(date_series)
    y.append(decomposition.trend)
    labels.append(df2['name'][i])
    line_width.append(2)
    dash_type.append(None)
    c.append(color_palette[i % len(color_palette)])

_draw_multiple_line_plot('growth_trends.html', 
                         'growth_trend', 
                         X,
                         y,
                         c, 
                         labels,
                         dash_type,
                         line_width,
コード例 #46
0
    result = [series[0]]  # first value is same as series
    for n in range(1, len(series)):
        result.append((series[:n + 1].mean()))
    return result


df['cumulative'] = cumulative(df['sales'])
df['cum2'] = df['sales'].mean()
df['moving_average'] = df['sales'].rolling(window=10).mean()
plt.style.use('fivethirtyeight')
ax = df.plot(figsize=(18, 6), fontsize=14)
plt.title("原始数据趋势图")
plt.show()

rcParams['figure.figsize'] = 18, 6
result_a = seasonal_decompose(data, model='additive')
fig = result_a.plot()
plt.title("累加效果图")
plt.show()

rcParams['figure.figsize'] = 18, 6
result_m = seasonal_decompose(data, model='multiplicative')
fig = result_m.plot()
plt.title("累乘效果图")
plt.show()

################################################################
#Simple Exponential Smoothing (level)
'''
xt=a+ϵtxt=a+ϵt  (model) 
x̂ t,t+1=αxt+(1−α)x̂ t−1,tx^t,t+1=αxt+(1−α)x^t−1,t (forecast) 
コード例 #47
0
bus.set_index(['index'], inplace=True)
bus.index.name=None

len(date_list)  # check
len(bus.index)   # check

# riders
bus.columns= ['riders']
# df['riders'] = df.riders.apply(lambda x: int(x)*100)
bus['riders'] = bus.riders.apply(lambda x: int(x))
bus.riders

bus.riders.plot(figsize=(12,8), title= 'Monthly Ridership (100,000s)', fontsize=14)
# plt.savefig('month_ridership.png', bbox_inches='tight')   # optional save

decomposition = seasonal_decompose(bus.riders, freq=12)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(15, 8)
# plt.savefig('seasonal.png', bbox_inches='tight')   # optional save

# grab just one graphic doing the following:
seasonal = decomposition.seasonal 
seasonal.plot()

# define Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):

    #Determing rolling statistics
    rolmean = pd.rolling_mean(timeseries, window=12)
コード例 #48
0
# ## Decomposing

# Both trend and seasonality are modelled separately and the remaining part of the series is returned. For more details watch these videos: <br/>
#
# Seasonal Decomposition and Forecasting:
#
# 1. https://www.youtube.com/watch?v=85XU1T9DIps (Part I)
# 2. https://www.youtube.com/watch?v=CfB9ROwF2ew (Part II)

# In[32]:

from statsmodels.tsa.seasonal import seasonal_decompose

# In[33]:

decomposition = seasonal_decompose(ts_log, freq=700)

# In[34]:

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

# In[35]:

plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')

# In[36]:
#my_plot = port_df_mean['attributes_bytes'].plot.hist()
my_plot = calc_ent2['entropy'].plot.hist()


# In[25]:

import matplotlib.pyplot as pplt
#my_plot=pplt.plot(port_df_mean['attributes_bytes'])
my_plot=pplt.plot(calc_ent2['entropy'])
pplt.autoscale(enable=True, axis='x', tight=None)
pplt.show()


# In[26]:

decomposition = seasonal_decompose(calc_ent2.entropy.values, freq=24)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(15, 8)


# In[44]:

model=ARIMA(calc_ent2['entropy'],(1,0,0))    ## The endogenous variable needs to be type Float or you get a cast error
model_fit = model.fit()       # fit is a Function
model_fitted = model_fit.fittedvalues    # fittedvalues is a Series
print(model_fit.summary())
print(model_fitted)


# In[29]:
コード例 #50
0
airline.plot(figsize=(10, 8))

airline['EWMA-12'] = airline['#Passengers'].ewm(span=12).mean()

airline[["#Passengers", "EWMA-12"]].plot()

##ETS (Error-Trend-Seasonality) MODELS
##Exponential Smoothing
##Trend Methods Models
##ETS Decomposition
airline.plot()

from statsmodels.tsa.seasonal import seasonal_decompose

result = seasonal_decompose(airline['#Passengers'], model='multiplicative')
result.seasonal.plot()
result.trend.plot()

result.plot()

##ARIMA MODELS
##Step 1
df = pd.read_csv('monthly-milk-production.csv')
df.head()

df.columns = ["Month", "Milk in pounds per cow"]
df.head()

df.tail()
##to drop a row
コード例 #51
0
def diagnostics():
    decomposition = seasonal_decompose(view_hour['distinct_freq_sum'].values,freq=24 )  
      
    fig = decomposition.plot()  
    fig.set_size_inches(50, 8)
コード例 #52
0
def diagnostics():
    decomposition = seasonal_decompose(voltage_df['rel_counts'].values,freq=24 )  
      
    fig = decomposition.plot()  
    fig.set_size_inches(50, 8)
コード例 #53
0
ファイル: test_seasonal.py プロジェクト: 5267/statsmodels
    def test_one_sided_moving_average_in_stl_decompose(self):
        res_add = seasonal_decompose(self.data.values, freq=4, two_sided=False)

        seasonal = np.array([76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4,
                             -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03,
                             -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76,
                             90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4,
                             76.76, 90.03, -114.4, -52.4])

        trend = np.array([np.nan, np.nan, np.nan, np.nan, 159.12, 204., 221.25,
                          245.12, 319.75, 451.5, 561.12, 619.25, 615.62, 548.,
                          462.12, 381.12, 316.62, 264., 228.38, 210.75, 188.38,
                          199., 207.12, 191., 166.88, 72., -9.25, -33.12,
                          -36.75, 36.25, 103., 131.62])

        resid = np.array([np.nan, np.nan, np.nan, np.nan, 11.112, -57.031,
                          118.147, 136.272, 332.487, 267.469, 83.272, -77.853,
                          -152.388, -181.031, -152.728, -152.728, -56.388, -115.031,
                          14.022, -56.353, -33.138, 139.969, -89.728, -40.603,
                          -200.638, -303.031, 46.647, 72.522, 84.987, 234.719,
                          -33.603, 104.772])

        assert_almost_equal(res_add.seasonal, seasonal, 2)
        assert_almost_equal(res_add.trend, trend, 2)
        assert_almost_equal(res_add.resid, resid, 3)

        res_mult = seasonal_decompose(np.abs(self.data.values), 'm', freq=4, two_sided=False)

        seasonal = np.array([1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811,
                             0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449,
                             0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985,
                             1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755,
                             1.1985, 1.5449, 0.5811, 0.6755])

        trend = np.array([np.nan, np.nan, np.nan, np.nan, 171.625, 204.,
                          221.25, 245.125, 319.75, 451.5, 561.125, 619.25,
                          615.625, 548., 462.125, 381.125, 316.625, 264.,
                          228.375, 210.75, 188.375, 199., 207.125, 191.,
                          166.875, 107.25, 80.5, 79.125, 78.75, 116.5,
                          140., 157.375])

        resid = np.array([np.nan, np.nan, np.nan, np.nan, 1.2008, 0.752, 1.75,
                          1.987, 1.9023, 1.1598, 1.6253, 1.169, 0.7319, 0.5398,
                          0.7261, 0.6837, 0.888, 0.586, 0.9645, 0.7165, 1.0276,
                          1.3954, 0.0249, 0.7596, 0.215, 0.851, 1.646, 0.2432,
                          1.3244, 2.0058, 0.5531, 1.7309])

        assert_almost_equal(res_mult.seasonal, seasonal, 4)
        assert_almost_equal(res_mult.trend, trend, 2)
        assert_almost_equal(res_mult.resid, resid, 4)

        # test odd
        res_add = seasonal_decompose(self.data.values[:-1], freq=4, two_sided=False)
        seasonal = np.array([81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95,
                             -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48,
                             -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21,
                             94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74,
                             81.21, 94.48, -109.95])

        trend = [np.nan, np.nan, np.nan, np.nan, 159.12, 204., 221.25,
                 245.12, 319.75, 451.5, 561.12, 619.25, 615.62, 548.,
                 462.12, 381.12, 316.62, 264., 228.38, 210.75, 188.38,
                 199., 207.12, 191., 166.88, 72., -9.25, -33.12,
                 -36.75, 36.25, 103.]

        random = [np.nan, np.nan, np.nan, np.nan, 6.663, -61.48,
                  113.699, 149.618, 328.038, 263.02, 78.824, -64.507,
                  -156.837, -185.48, -157.176, -139.382, -60.837, -119.48,
                  9.574, -43.007, -37.587, 135.52, -94.176, -27.257,
                  -205.087, -307.48, 42.199, 85.868, 80.538, 230.27, -38.051]

        assert_almost_equal(res_add.seasonal, seasonal, 2)
        assert_almost_equal(res_add.trend, trend, 2)
        assert_almost_equal(res_add.resid, random, 3)
コード例 #54
0
    return dt
#
dataset = read_csv('Load/Task 1/L1-train.csv')

dataset = read_csv('train.csv')


data = dataset['w1']
#plt.plot(data)
#plt.show()
start = datetime.strptime("Jan 1 2001  1:00AM", "%b %d %Y %I:%M%p")
end = datetime.strptime("Oct 1 2010  12:00AM", "%b %d %Y %I:%M%p")
data.index = pd.DatetimeIndex(freq='h', start=start, end=end)
data.to_csv('train_w1.csv')

decomposition = seasonal_decompose(data[:10*27], model="additive")

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

trend.plot()
seasonal.plot()
residual.plot()

# 移动平均图
def draw_trend(timeSeries, size):
    f = plt.figure(facecolor='white')
    # 对size个数据进行移动平均
    rol_mean = timeSeries.rolling(window=size).mean()
    # 对size个数据进行加权移动平均
# fontsize is just for the axes size
unq_rel_cnts1['distinct_freq'].loc[:].plot(figsize=(40,8), fontsize=30)  


# #### Execute some Univariate Statistics

# In[17]:


unq_rel_cnts1['distinct_freq'].describe()


# In[18]:


decomposition = seasonal_decompose(unq_rel_cnts1['distinct_freq'].values,freq=24 )  
  
fig = decomposition.plot()  
fig.set_size_inches(15, 8)


# In[19]:


# Graph Autocorrelation and Partial Autocorrelation data
fig, axes = plt.subplots(1, 2, figsize=(15,4))

fig = sm.graphics.tsa.plot_acf(unq_rel_cnts1['distinct_freq'], lags=12, ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(unq_rel_cnts1['distinct_freq'], lags=12, ax=axes[1])

コード例 #56
0
df = pd.DataFrame(df.price)

df.asfreq('M') # najde posledni den v mesici a vezme jeho hodnotu, pokud zadna hodnota neni (napr vikend) doplni NaN, proto lepsi pouzit nasledujici:
df = df.resample('M').last() 

roll = 12

df['rolling_mean'] = df['price'].rolling(roll).mean()
df['rolling_std'] = df['price'].rolling(roll).std()
df.plot(title = 'gold price')

# non-linear growth pattern can be observed -> use the multiplicative model

from statsmodels.tsa.seasonal import seasonal_decompose

decomposition = seasonal_decompose(df.price, model = 'multiplicative')
seasonal_decompose?

decomposition.plot()
dir(decomposition)
decomposition.resid.plot()
decomposition.trend.plot()
decomposition.seasonal.plot()

#%% Testing for stationarity in time series

import pandas as pd
from statsmodels.tsa.stattools import adfuller
import yfinance as yf
import statsmodels.tsa.api as smt
import matplotlib.pyplot as plt
コード例 #57
0
ファイル: basic_detectors.py プロジェクト: morfast/opprentice
def TSD(x):
    result = seasonal_decompose(x, model='additive', freq=1440)
    return result.trend, result.seasonal, result.resid
コード例 #58
0
ts_log_diff = ts_log - ts_log.shift()
ts_log_diff.dropna(inplace=True)

plt.plot(ts_log_diff)
#%%
test_stationarity(ts_log_diff)
'''
It is stationary because:
• the mean and std variations have small variations with time.
• test statistic is less than 10 percent of the critical values, so we can be 90 percent confident that this is stationary.
'''
#%%

# Decomposing
decomp = seasonal_decompose(ts_log)
trend = decomp.trend
seasonal = decomp.seasonal
residual = decomp.resid

plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residual')
コード例 #59
0
Depending on the nature of the trend and seasonality, a time series can be modeled as an additive or multiplicative, 
wherein, each observation in the series can be expressed as either a sum or a product of the components:

Additive time series:
Value = Base Level + Trend + Seasonality + Error

Multiplicative Time Series:
Value = Base Level x Trend x Seasonality x Error

If you look at the residuals of the multiplicatve decomposition closely, it has some pattern left over. The additive
decomposition, however, looks quite random which is good. So ideally, additive decomposition should be preferred 
for this particular series.
"""

result_add = seasonal_decompose(df['PASSENGER_SUM_DAY'],
                                model='additive',
                                extrapolate_trend='freq')
result_mul = seasonal_decompose(df['PASSENGER_SUM_DAY'],
                                model='multiplicative',
                                extrapolate_trend='freq')
result_mul.plot().suptitle('Multiplicative Decompose', fontsize=22)
plot.show()
plot.close()
result_add.plot().suptitle('Additive Decompose', fontsize=22)
plot.show()
plot.close()

# DESEASON THE VARIABLE
# Se ve claramente una tendencia cada 7 días de seasonality
df['PASSENGER_SUM_DAY'] = np.log(df['PASSENGER_SUM_DAY']) - np.log(
    df['PASSENGER_SUM_DAY']).shift(7)
コード例 #60
0
ファイル: visualize.py プロジェクト: aarora79/sitapt

print 'lag_partial_correlations'
print lag_partial_correlations
y = lag_partial_correlations
_draw_multiple_line_plot('lag_partial_correlations.html', 
                         'lag_partial_correlations', 
                         [X],
                         [y],
                         ['navy'], 
                         ['lag_partial_correlations'],
                         [None],
                         [1],
                         'datetime', 'Date', 'lag_partial_correlations', y_start=-1, y_end=1)

decomposition = seasonal_decompose(np.array(df['https']), model='additive', freq=30)  
_draw_decomposition_plot('decomposition.html', X, decomposition, 'seasonal decomposition', 'datetime', 'decomposition', width=600, height=400)

model = sm.tsa.ARIMA(np.array(df['https'].iloc[1:]), order=(2,0,0))  
results = model.fit(disp=-1)  

#predict next 10 values
num_predictions = 12
predicted_dates = []
last_date = X[-1]
for i in range(num_predictions):
    next_date = last_date + 30
    predicted_dates.append(next_date)
    last_date = next_date

#predicted_dates=np.array(['2015-10-17', '2015-12-19', '2016-03-19', '2016-06-19', '2016-09-19'], dtype=np.datetime64)