Ejemplo n.º 1
0
def autocorrelation_graph(X):
    """
    Plot the autocorrelation graph of a pandas series
    Pre-condition:
        X - a pandas Series
    """
    plt.figure(figsize=(10, 5))
    autocorrelation_plot(X)
    plt.show()
Ejemplo n.º 2
0
    def plot_autocorrelation(self, ax=None):
        """
        Plots autocorrelation of power data 
        Reference: 
        http://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm

        Returns
        -------
        matplotlib.axis 
        """
        if ax is None:
            ax = plt.gca()
        for power in self.power_series():
            autocorrelation_plot(power, ax=ax)
        return ax
Ejemplo n.º 3
0
    def test_autocorrelation_plot(self):
        from pandas.plotting import autocorrelation_plot
        _check_plot_works(autocorrelation_plot, series=self.ts)
        _check_plot_works(autocorrelation_plot, series=self.ts.values)

        ax = autocorrelation_plot(self.ts, label='Test')
        self._check_legend_labels(ax, labels=['Test'])
Ejemplo n.º 4
0
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)

single.adj_nav.plot(
    ax=ax1,
    sharex=True,
)  #ylim = (0, 14)
pplt.table(ax=ax2,
           data=np.round(single.rate.describe().T, 2),
           loc='upper right',
           colWidths=[0.2, 0.2, 0.2],
           edges='open',
           cellLoc='centel',
           colLoc='centel',
           rowLoc='centel',
           in_layout=False)
single.rate.plot.kde(ax=ax2, sharex=True)
single.rate.plot.hist(ax=ax2,
                      sharex=True,
                      secondary_y=True,
                      alpha=0.5,
                      bins=50)
single.rate.plot(ax=ax3)
pplt.autocorrelation_plot(single.rate, ax=ax4)

plt.show()

profile = single(title='Titanic Dataset')
profile.to_file(output_file='result/titanic_report.html')
Ejemplo n.º 5
0
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# In[32]:

fig_first = plot_acf(df["Milk First Difference"].dropna())

# In[33]:

fig_seasonal_first = plot_acf(df["Seasonal First Difference"].dropna())

# In[34]:

from pandas.plotting import autocorrelation_plot

autocorrelation_plot(df['Seasonal First Difference'].dropna())

# In[35]:

result = plot_pacf(df["Seasonal First Difference"].dropna())

# In[36]:

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(df['Seasonal First Difference'].iloc[13:],
                               lags=40,
                               ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(df['Seasonal First Difference'].iloc[13:],
                                lags=40,
df.Server.value_counts()[:25]
#
#ds.to_csv("data/VirtualOneServer.csv")
#ds1.to_csv("data/VirtualFormattedonseXerver.csv")
#
#df.to_csv("data/virtualMemoryData.csv")

#plt.plot(df[["Time","MemoryUsed"]])
#plt.show()

from pandas.plotting import autocorrelation_plot
from matplotlib.pyplot import figure

figure(num=None, figsize=(20, 12), dpi=80, facecolor='w', edgecolor='k')

autocorrelation_plot(ds["MemoryUsed"])
plt.show()

figure(num=None, figsize=(20, 12), dpi=80, facecolor='w', edgecolor='k')

plt.plot(y_test)
plt.show()

rmse_val = []
for K in range(100):
    K = K + 1
    model = KNeighborsRegressor(n_neighbors=K)

    model.fit(X_train, y_train)  #fit the model
    pred = model.predict(X_test)  #make prediction on test set
    error = np.sqrt(mse(y_test, pred))  #calculate rmse
Ejemplo n.º 7
0
      ,[Close]
  FROM [NSEData].[dbo].[IndexHistory]
  where [Index Name] = 'NIFTY 50'
'''
df = pd.read_sql_query(sql, engine)

df.index = df['Date'].apply(pd.to_datetime)
del df['Date']

df = df.fillna(df.bfill())
df = df['Close'].resample('MS').mean()

df.plot()
plt.show()

autocorrelation_plot(df)
plt.show()

quantity = df.values
size = int(len(quantity) * 0.66)
train, test = quantity[0:size], quantity[size:len(quantity)]
history = [x for x in train]
predictions = list()

for t in range(len(test)):
    model = ARIMA(history, order=(5 ,2 ,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat[0])
    obs = test[t]
df_test = pd.read_csv(
    'ElectricDemandForecasting-DL-master_data_hourly_20140102_20191101_test.csv',
    parse_dates=['datetime'],
    date_parser=dateparse,
    index_col=1)

# convert to datetime
df_test['datetime'] = pd.to_datetime(df_test['datetime'], utc=True)
df_train['datetime'] = pd.to_datetime(df_train['datetime'], utc=True)

df_test = df_test.set_index('datetime')
df_train = df_train.set_index('datetime')

from pandas.plotting import autocorrelation_plot

autocorrelation_plot(df_test)
pyplot.show()

from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(df_test, order=(1, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())
Ejemplo n.º 9
0
print(df['electricity_available'].shift(1))

df['Seasonal First Difference'] = df['electricity_available'] - df[
    'electricity_available'].shift(12)

print(df.head(14))

## Again test Dickey Fuller test
adfuller_test(df['Seasonal First Difference'].dropna())

#plt.plot(df['Seasonal First Difference'])
#plt.show()

# Correlations
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['electricity_available'])
#plt.show()

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = plot_acf(df['Seasonal First Difference'].iloc[13:], lags=40, ax=ax1)

#plt.show()

ax2 = fig.add_subplot(212)
fig = plot_pacf(df['Seasonal First Difference'].iloc[13:], lags=40, ax=ax2)

#plt.show()
Ejemplo n.º 10
0
plot_series(time, series)
plt.plot(time, signal, "k-")
plt.show()

signal = impulses(time, 10, seed=42)
series = autocorrelation(signal, {1: 0.70, 50: 0.2})
plot_series(time, series)
plt.plot(time, signal, "k-")
plt.show()

series_diff1 = series[1:] - series[:-1]
plot_series(time[1:], series_diff1)

from pandas.plotting import autocorrelation_plot

autocorrelation_plot(series)

from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(series, order=(5, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

import pandas as pd
df = pd.read_csv("sunspots.csv", parse_dates=["Date"], index_col="Date")
series = df["Monthly Mean Total Sunspot Number"].asfreq("1M")
series.head()

series.plot(figsize=(12, 5))

series["1995-01-01":].plot()
Ejemplo n.º 11
0
df['seasonal diff'].plot()
adf_check(df['seasonal diff'].dropna())

#seasonal first difference
df['seasonal first diff'] = df['first diff'] - df['first diff'].shift(12)
df['seasonal first diff'].plot()
adf_check(df['seasonal first diff'].dropna())

#autocorrelation plots
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
fig_first = plot_acf(df['first diff'].dropna())
fig_seasonal_first = plot_acf(df['seasonal first diff'].dropna())

#can also be done in pandas
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['seasonal first diff'].dropna())

#partial autocorrelation plots
result = plot_pacf(df['seasonal first diff'].dropna())

plot_acf(df['seasonal first diff'].dropna())
plot_pacf(df['seasonal first diff'].dropna())

#acutally building ARIMA model
from statsmodels.tsa.arima_model import ARIMA
#seasonal model
model = sm.tsa.statespace.SARIMAX(df['pounds'], order=(0,1,0), seasonal_order=(1,1,1,12))
results = model.fit()
print(results.summary())
results.resid
results.resid.plot()
Ejemplo n.º 12
0
def main():
    print 'loading data'
    data = load_data('aapl.us.txt')
    data.plot()
    plt.title('data before differencing')
    plt.show()

    test_stationarity(data['open'])

    first_difference = apply_differencing(data)

    test_stationarity(first_difference['open'])

    first_difference.plot()
    plt.title('data after differencing')
    plt.show()
    autocorrelation_plot(data)
    plt.title('autocorrelation of data')
    plt.show()
    autocorrelation_plot(first_difference)
    plt.title('autocorrelation of data after differencing')
    plt.show()

    random_array = pd.DataFrame(np.random.rand(1, len(
        first_difference.index))).transpose()
    autocorrelation_plot(random_array)
    plt.title('autocorrelation of random number array')
    plt.show()

    print 'fitting arema model'

    # result = seasonal_decompose(data, model='multiplicative', freq = 6)

    # result.plot()
    # plt.show()

    # print 'residuals 5 number summary'
    #
    # print result.resid.describe()

    train = data.loc['2007-01-01':'2016-12-31']
    test = data.loc['2017-01-01':]

    stepwise_model = auto_arima(train,
                                start_p=10,
                                start_q=0,
                                max_p=1000,
                                max_q=1000,
                                max_d=1000,
                                max_order=1000,
                                m=365,
                                start_P=0,
                                seasonal=False,
                                d=1,
                                D=1,
                                trace=True,
                                suppress_warnings=True,
                                stepwise=True)

    print 'final arima order \n \n'
    print stepwise_model.order
    stepwise_model.fit(train)
    future_forecast = stepwise_model.predict(n_periods=len(test['open']))

    # print(future_forecast)
    future_forecast = pd.DataFrame(future_forecast,
                                   index=test.index,
                                   columns=['Prediction'])

    future_forecast.plot(title='forecast for 2017')
    plt.show()

    test.plot(title='actual data for 2017')
    plt.show()

    prediction_error = pd.DataFrame(
        (future_forecast['Prediction'] - test['open'])**2, index=test.index)
    prediction_error_monthly = (prediction_error.groupby(
        pd.Grouper(freq='M')).mean())**0.5
    prediction_error_monthly.plot(title='Monthly mean square error')
    plt.show()
Ejemplo n.º 13
0
model = VAR(endog=arsi_d)
model_fit = model.fit()
yhat = model_fit.forecast(model_fit.y, steps=1)
print(yhat)
#%%  

#%%

from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from pandas.plotting import autocorrelation_plot


#ARIMA 
NDVI = arsi.NDVI
autocorrelation_plot(NDVI)

size = int(len(NDVI) * 0.7)
train, test = NDVI[0:size], NDVI[size:len(NDVI)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
	model = ARIMA(history, order=(5,0,2))
	model_fit = model.fit(disp=0)
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
Ejemplo n.º 14
0
tsa_plots.plot_acf(Walmart.Footfalls,lags=12)
tsa_plots.plot_pacf(Walmart.Footfalls,lags=12)


model1=ARIMA(Walmart.Footfalls,order=(12,1,1)).fit(disp=0)
model2=ARIMA(Walmart.Footfalls,order=(1,1,5)).fit(disp=0)
model1.aic
model2.aic

p=1
q=0
d=1
pdq=[]
aic=[]
for q in range(7):
    model=ARIMA(Walmart.Footfalls,order=(p,d,q)).fit(disp=0)
    x = model.aic
    x1 = p,d,q

    aic.append(x)
    pdq.append(x1)

keys = pdq
values = aic
d = dict(zip(keys, values))
print (d)

from pandas.plotting import autocorrelation_plot
autocorrelation_plot(Walmart.Footfalls)
Ejemplo n.º 15
0
# In[8]:


#Visualize Autocorrelation of Bitcoin prices
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
fig, ax = plt.subplots(1,3, figsize = (21,6))
a0 = lag_plot(BTC, ax = ax[0])
ax[0].set_title('Lag 1 Autocorrelation Scatterplot')
a1 = plot_acf(BTC, lags = 30, ax = ax[1])
ax[1].set_title('Autocorrelation over 30 lags')
ax[1].set_ylabel('Autocorrelation')
ax[1].set_xlabel('Lag')
a2 = autocorrelation_plot(BTC, ax = ax[2])
ax[2].set_title('Autocorrelation over all lags')
plt.show()


# As we can see from the above plots, the day closing prices of Bitcoin are heavily correlated. Our model should take this into account. Now, we should check the stationarity of the series before building our model. We will do this both by plotting and running statistical tests.

# In[9]:


#Take first differences
df_example_diff = df_example.diff()
df_example_diff = df_example_diff.dropna()

#Plot values
fig, ax = plt.subplots(1,3, figsize = (21,6))
Ejemplo n.º 16
0
    'Now check the autocorrelation.  How much informtion can be derived from the signal to predict each value.'
)
from pandas import DataFrame
from pandas import concat
from matplotlib import pyplot

values = DataFrame(signals.values)
dataframe = concat([values.shift(1), values], axis=1)
dataframe.columns = ['t-1', 't+1']
result = dataframe.corr()
print(result)

from pandas.plotting import autocorrelation_plot
from sklearn.metrics import mean_squared_error

autocorrelation_plot(signals)
pyplot.show()

# In[]
print('Check the default model.')

# create lagged dataset
values = DataFrame(signals.values)
dataframe = concat([values.shift(1), values], axis=1)
dataframe.columns = ['t-1', 't+1']
# split into train and test sets
X = dataframe.values
train, test = X[1:len(X) - 7], X[len(X) - 7:]
train_X, train_y = train[:, 0], train[:, 1]
test_X, test_y = test[:, 0], test[:, 1]
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

# from pandas.plotting import autocorrelation_plot
from pandas.plotting import autocorrelation_plot

x = df["diet"].astype(float)
autocorrelation_plot(x)
plt.show()

from statsmodels.tsa.stattools import acf

x_diff = x.diff().dropna()  # first item is NA
lag_acf = acf(x_diff, nlags=36)
plt.plot(lag_acf)
plt.title('Autocorrelation Function')
plt.show()

from statsmodels.tsa.stattools import acf, pacf

x = df["gym"].astype(float)

x_diff = x.diff().dropna()  # first item is NA
Ejemplo n.º 18
0
        return data.loc[data.index.get_level_values(0) == userId].copy()
    except KeyError:
        print('El usuario ', userId, ' no existe.')


lista = []
df = pd.read_pickle('./pkl/dataset.pkl')
for i in df.index.get_level_values(0).drop_duplicates():
    dfu = get_user_data(df, i).droplevel(0).loc[:, 'slevel']
    idx = pd.date_range('2013-03-27 04:00:00', '2013-06-01 3:00:00', freq='h')
    d = pd.DataFrame(index=idx)
    d['slevel'] = dfu
    a = d.isna().sum()
    lista.append(a)
    nulls = d.isna()
    plt.close()
    plt.scatter(list(range(len(idx))), nulls, 0.1, marker='x')
    plt.show()

    d.ffill(inplace=True)

    # Autocorrelation Plot
    plt.close()
    plt.rcParams.update({'figure.figsize': (9, 5), 'figure.dpi': 120})
    autocorrelation_plot(d.slevel)
    plt.title('{0},{1}'.format(str(i), a))
    plt.show()

    plot_by_week(i)

print(a)
Ejemplo n.º 19
0
# integer encode direction
encoder = LabelEncoder()
[encoder.fit_transform(values[:,categor_variable]) for categor_variable in range(4,7)]
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[7,8,9,11,12,13]], axis=1, inplace=True)
print(reframed.head())

autocorrelation_plot(dataset['energy'].dropna())
pyplot.xlim(0,24)
pyplot.title('Auto-correlation of hourly demand over a 24 hour period')
pyplot.show()

reframed.columns = ['windSpeed', 'humidity' ,'temperature' ,'energy_1', 'hour' ,'dayofweek', 'month', 'energy']
reframed.head()

X = reframed.values[:,0:7]
Y = reframed.values[:,7]
tscv = TimeSeriesSplit(n_splits=3)
print(tscv)


for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
Ejemplo n.º 20
0
#Decomposicao da serie
Quantidade['Aumento'] = Quantidade['Quantity'].diff()
GraficoLinhas(Quantidade["Aumento"], "Série", "Aumento", "Tempo")

Quantidade['Aceleracao'] = Quantidade['Aumento'].diff()
GraficoLinhas(Quantidade["Aceleracao"], "Série", "Aceleracao", "Tempo")

#Plot dos 3 graficos
Componentes(
    [Quantidade["Quantity"], Quantidade["Aumento"], Quantidade["Aceleracao"]],
    ['Serie', 'Aumento', 'Aceleracao'], 'Analise da serie temporal')

#------------------------------------------------------------------------------
#Analise de autocorrelacao
ax = plt.figure(figsize=(16, 12))
autocorrelation_plot(Quantidade["Quantity"])
plt.title('Grafico de autocorrelacao', fontsize=18, loc='left')
plt.ylabel('Autocorrelacao', fontsize=16)
plt.xlabel('Tempo')
ax = ax

ax = plt.figure(figsize=(16, 12))
autocorrelation_plot(Quantidade["Aumento"][1:])
plt.title('Grafico de autocorrelacao', fontsize=18, loc='left')
plt.ylabel('Autocorrelacao', fontsize=16)
plt.xlabel('Tempo')
ax = ax

ax = plt.figure(figsize=(16, 12))
autocorrelation_plot(Quantidade["Aumento"][2:])
plt.title('Grafico de autocorrelacao', fontsize=18, loc='left')
Ejemplo n.º 21
0
def drawAutoCorr(df, col):
    autocorrelation_plot(df[col])
    plt.show()
               index=albany['Year'].to_list())
#%%
from matplotlib import pyplot
from pandas.plotting import lag_plot
lag_plot(ab)
pyplot.show()
#%%
## Here we are trying to find a correlation between the lagges valuse
values = pd.DataFrame(ab.values)
dataframe = pd.concat([values.shift(1), values], axis=1)
dataframe.columns = ['t', 't+1']
result = dataframe.corr()
print(result)
#%%
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(ab)
pyplot.show()
#%%
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(ab, lags=31)
pyplot.show()
#%%
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
from math import sqrt
values = DataFrame(ab.values)
dataframe = concat([values.shift(1), values], axis=1)
dataframe.columns = ['t', 't+1']
Ejemplo n.º 23
0
# Figure 8.2
f, axarr = plot.subplots(3, 2)
f.subplots_adjust(hspace=0.5)
f.subplots_adjust(wspace=0.5)
random.seed(0)
random_time_series = pd.DataFrame(np.random.normal(0.1, 2, 5200),
                                  index=range(0, 5200),
                                  columns=['value'])
axarr[0, 0].plot(random_time_series.index, random_time_series['value'])
axarr[0, 0].set_xlim([101, 500])
axarr[0, 0].set_ylim([-10, 10])
axarr[0, 0].set_xlabel('time')
axarr[0, 0].set_ylabel('value')

autocorrelation_plot(random_time_series['value'], ax=axarr[0, 1])
axarr[0, 1].set_xlim([0, 30])
axarr[0, 1].set_ylim([-1.1, 1.1])

# rolling_window_data = pd.rolling_mean(random_time_series['value'], 10)
rolling_window_data = pd.Series(
    random_time_series['value']).rolling(window=10).mean()
axarr[1, 0].plot(random_time_series.index, rolling_window_data)
axarr[1, 0].set_xlim([101, 500])
axarr[1, 0].set_ylim([-10, 10])
axarr[1, 0].set_xlabel('time')
axarr[1, 0].set_ylabel('value')

autocorrelation_plot(rolling_window_data[10:], ax=axarr[1, 1])
axarr[1, 1].set_xlim([0, 30])
axarr[1, 1].set_ylim([-1.1, 1.1])
Ejemplo n.º 24
0
# Why is it a nan?
diff_data = diff_data.dropna()
# If we dont do this, what will happen for the following codes

# Plot our data
plt.plot(data)
plt.figure()
plt.plot(diff_data)

# Plot the ACF for the data. This call opens a new plot
smt.graphics.tsa.plot_acf(data, lags=30, alpha = 0.05)
# lags = 30 means drawing 30 lags
# Here alpha=.05, 95% confidence intervals are returned 
# where the standard deviation is computed according to 
# Bartlett’s formula.

# You may change 0.05 to other values for alpha to see what will happen

# Plot the ACF for the data. This call opens a new plot
# smt.graphics.tsa.plot_pacf(data, lags=30, alpha=0.05)

# For differenced time series
smt.graphics.tsa.plot_acf(diff_data, lags=30, alpha = 0.05)
#plt.figure()
#smt.graphics.tsa.plot_pacf(diff_data, lags=30, alpha = 0.05)

# Use pandas for ACF, but not plot function for PACF
plt.figure()  # We need prepare the figure
autocorrelation_plot(data)

 
Ejemplo n.º 25
0
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
from tensorflow_probability import sts
import pandas as pd  
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import autocorrelation_plot

tf.set_random_seed(666)
dataframe = pd.read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
dataset = np.array(dataset.astype('float32'))


autocorrelation_plot(dataset)

corr=[]
for i in range(0,len(dataset)):
    #print(i,pd.Series(dataset.T[0]).autocorr(lag=i))
    corr.append(pd.Series(dataset.T[0]).autocorr(lag=i))

janela=(np.where(corr[1:-2]==np.max(corr[1:-2]))[0]+1)[0]
    
X0=dataset[0:-12]
Y0=dataset[-12:]

def build_model(observed_time_series):
  trend = sts.LocalLinearTrend(observed_time_series=observed_time_series)
  seasonal = tfp.sts.Seasonal(
      num_seasons=int(len(dataset)/janela), observed_time_series=observed_time_series)
Ejemplo n.º 26
0
 def plot_autocorrelation(self):
     # Plot auto correlation
     autocorrelation_plot(self.series)
     plt.show()
Ejemplo n.º 27
0
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA

def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

series = read_csv('sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
#print(series.head())
#series.plot()
autocorrelation_plot(series)
pyplot.show()

"""# fit model
model = ARIMA(series, order=(5,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe()) """
Ejemplo n.º 28
0
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot

def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
autocorrelation_plot(series)
pyplot.show()
Ejemplo n.º 29
0
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import mysql.connector as sql
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA
from pandas import DataFrame

db_connection = sql.connect(host='18.206.99.175',
                            database='market_data',
                            user='******',
                            password='******')

dados = pd.read_sql(
    'SELECT datetime_buss, price FROM mining_trade where active = "WINQ19" and date(datetime_buss) = date("2019-07-30"))',
    con=db_connection,
    index_col='datetime_buss')

autocorrelation_plot(dados)

# fit model
model = ARIMA(dados, order=(3600, 1, 21))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot()
plt.show()
residuals.plot(kind='kde')
plt.show()
print(residuals.describe())
Ejemplo n.º 30
0
def plot(input_ts='-',
         columns=None,
         start_date=None,
         end_date=None,
         clean=False,
         skiprows=None,
         index_type='datetime',
         names=None,
         ofilename='plot.png',
         type='time',
         xtitle='',
         ytitle='',
         title='',
         figsize='10,6.0',
         legend=None,
         legend_names=None,
         subplots=False,
         sharex=True,
         sharey=False,
         colors='auto',
         linestyles='auto',
         markerstyles=' ',
         style='auto',
         logx=False,
         logy=False,
         xaxis='arithmetic',
         yaxis='arithmetic',
         xlim=None,
         ylim=None,
         secondary_y=False,
         mark_right=True,
         scatter_matrix_diagonal='kde',
         bootstrap_size=50,
         bootstrap_samples=500,
         norm_xaxis=False,
         norm_yaxis=False,
         lognorm_xaxis=False,
         lognorm_yaxis=False,
         xy_match_line='',
         grid=False,
         label_rotation=None,
         label_skip=1,
         force_freq=None,
         drawstyle='default',
         por=False,
         invert_xaxis=False,
         invert_yaxis=False,
         round_index=None,
         plotting_position='weibull',
         source_units=None,
         target_units=None,
         lag_plot_lag=1):
    r"""Plot data."""
    # Need to work around some old option defaults with the implementation of
    # mando
    legend = bool(legend == '' or legend == 'True' or legend is None)

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FixedLocator

    tsd = tsutils.common_kwds(tsutils.read_iso_ts(input_ts,
                                                  skiprows=skiprows,
                                                  names=names,
                                                  index_type=index_type),
                              start_date=start_date,
                              end_date=end_date,
                              pick=columns,
                              round_index=round_index,
                              dropna='all',
                              source_units=source_units,
                              target_units=target_units,
                              clean=clean)

    if type in ['bootstrap',
                'heatmap',
                'autocorrelation',
                'lag_plot']:
        if len(tsd.columns) != 1:
            raise ValueError("""
*
*   The '{1}' plot can only work with 1 time-series in the DataFrame.
*   The DataFrame that you supplied has {0} time-series.
*
""".format(len(tsd.columns), type))

    if por is True:
        tsd = tsutils.common_kwds(tsutils.read_iso_ts(tsd),
                                  start_date=start_date,
                                  end_date=end_date,
                                  round_index=round_index,
                                  dropna='no')

    # This is to help pretty print the frequency
    try:
        try:
            pltfreq = str(tsd.index.freq, 'utf-8').lower()
        except TypeError:
            pltfreq = str(tsd.index.freq).lower()
        if pltfreq.split(' ')[0][1:] == '1':
            beginstr = 3
        else:
            beginstr = 1
        if pltfreq == 'none':
            short_freq = ''
        else:
            # short freq string (day) OR (2 day)
            short_freq = '({0})'.format(pltfreq[beginstr:-1])
    except AttributeError:
        short_freq = ''

    if legend_names:
        lnames = tsutils.make_list(legend_names)
        if len(lnames) != len(set(lnames)):
            raise ValueError("""
*
*   Each name in legend_names must be unique.
*
""")
        if len(tsd.columns) == len(lnames):
            renamedict = dict(list(zip(tsd.columns, lnames)))
        elif type == 'xy' and len(tsd.columns) // 2 == len(lnames):
            renamedict = dict(list(zip(tsd.columns[2::2], lnames[1:])))
            renamedict[tsd.columns[1]] = lnames[0]
        else:
            raise ValueError("""
*
*   For 'legend_names' you must have the same number of comma
*   separated names as columns in the input data.  The input
*   data has {0} where the number of 'legend_names' is {1}.
*
*   If 'xy' type you need to have legend names as x,y1,y2,y3,...
*
""".format(len(tsd.columns), len(lnames)))
        tsd.rename(columns=renamedict, inplace=True)
    else:
        lnames = tsd.columns

    if colors == 'auto':
        colors = color_list
    else:
        colors = tsutils.make_list(colors)

    if linestyles == 'auto':
        linestyles = line_list
    else:
        linestyles = tsutils.make_list(linestyles)

    if markerstyles == 'auto':
        markerstyles = marker_list
    else:
        markerstyles = tsutils.make_list(markerstyles)
        if markerstyles is None:
            markerstyles = ' '

    if style != 'auto':

        nstyle = tsutils.make_list(style)
        if len(nstyle) != len(tsd.columns):
            raise ValueError("""
*
*   You have to have the same number of style strings as time-series to plot.
*   You supplied '{0}' for style which has {1} style strings,
*   but you have {2} time-series.
*
""".format(style, len(nstyle), len(tsd.columns)))
        colors = []
        markerstyles = []
        linestyles = []
        for st in nstyle:
            colors.append(st[0])
            if len(st) == 1:
                markerstyles.append(' ')
                linestyles.append('-')
                continue
            if st[1] in marker_list:
                markerstyles.append(st[1])
                try:
                    linestyles.append(st[2:])
                except IndexError:
                    linestyles.append(' ')
            else:
                markerstyles.append(' ')
                linestyles.append(st[1:])
    if linestyles is None:
        linestyles = [' ']
    else:
        linestyles = [' ' if i == '  ' else i for i in linestyles]
    markerstyles = [' ' if i is None else i for i in markerstyles]

    icolors = itertools.cycle(colors)
    imarkerstyles = itertools.cycle(markerstyles)
    ilinestyles = itertools.cycle(linestyles)

    style = ['{0}{1}{2}'.format(next(icolors),
                                next(imarkerstyles),
                                next(ilinestyles))
             for i in list(range(len(tsd.columns)))]

    # reset to beginning of iterator
    icolors = itertools.cycle(colors)
    imarkerstyles = itertools.cycle(markerstyles)
    ilinestyles = itertools.cycle(linestyles)

    if (logx is True or
            logy is True or
            norm_xaxis is True or
            norm_yaxis is True or
            lognorm_xaxis is True or
            lognorm_yaxis is True):
        warnings.warn("""
*
*   The --logx, --logy, --norm_xaxis, --norm_yaxis, --lognorm_xaxis, and
*   --lognorm_yaxis options are deprecated.
*
*   For --logx use --xaxis="log"
*   For --logy use --yaxis="log"
*   For --norm_xaxis use --type="norm_xaxis"
*   For --norm_yaxis use --type="norm_yaxis"
*   For --lognorm_xaxis use --type="lognorm_xaxis"
*   For --lognorm_yaxis use --type="lognorm_yaxis"
*
""")

    if xaxis == 'log':
        logx = True
    if yaxis == 'log':
        logy = True

    if type in ['norm_xaxis',
                'lognorm_xaxis',
                'weibull_xaxis']:
        xaxis = 'normal'
        if logx is True:
            logx = False
            warnings.warn("""
*
*   The --type={1} cannot also have the xaxis set to {0}.
*   The {0} setting for xaxis is ignored.
*
""".format(xaxis, type))

    if type in ['norm_yaxis',
                'lognorm_yaxis',
                'weibull_yaxis']:
        yaxis = 'normal'
        if logy is True:
            logy = False
            warnings.warn("""
*
*   The --type={1} cannot also have the yaxis set to {0}.
*   The {0} setting for yaxis is ignored.
*
""".format(yaxis, type))

    xlim = _know_your_limits(xlim, axis=xaxis)
    ylim = _know_your_limits(ylim, axis=yaxis)

    figsize = tsutils.make_list(figsize)

    if not isinstance(tsd.index, pd.DatetimeIndex):
        tsd.insert(0, tsd.index.name, tsd.index)

    if type in ['xy',
                'double_mass']:
        if tsd.shape[1] % 2 != 0:
            raise AttributeError("""
*
*   The 'xy' and 'double_mass' types must have an even number of columns
*   arranged as x,y pairs.  You supplied {0} columns.
*
""".format(tsd.shape[1]))
        colcnt = tsd.shape[1] // 2
    elif type in ['norm_xaxis',
                  'norm_yaxis',
                  'lognorm_xaxis',
                  'lognorm_yaxis',
                  'weibull_xaxis',
                  'weibull_yaxis']:
        colcnt = tsd.shape[1]

    if type in ['xy',
                'double_mass',
                'norm_xaxis',
                'norm_yaxis',
                'lognorm_xaxis',
                'lognorm_yaxis',
                'weibull_xaxis',
                'weibull_yaxis',
                'heatmap']:
        _, ax = plt.subplots(figsize=figsize)
        plotdict = {(False, True): ax.semilogy,
                    (True, False): ax.semilogx,
                    (True, True): ax.loglog,
                    (False, False): ax.plot}

    if type == 'time':
        ax = tsd.plot(legend=legend, subplots=subplots, sharex=sharex,
                      sharey=sharey, style=None, logx=logx, logy=logy,
                      xlim=xlim, ylim=ylim, secondary_y=secondary_y,
                      mark_right=mark_right, figsize=figsize,
                      drawstyle=drawstyle)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        xtitle = xtitle or 'Time'
        if legend is True:
            plt.legend(loc='best')
    elif type in ['taylor']:
        from .. skill_metrics import centered_rms_dev
        from .. skill_metrics import taylor_diagram
        ref = tsd.iloc[:, 0]
        std = [pd.np.std(ref)]
        ccoef = [1.0]
        crmsd = [0.0]
        for col in range(1, len(tsd.columns)):
            std.append(pd.np.std(tsd.iloc[:, col]))
            ccoef.append(pd.np.corrcoef(tsd.iloc[:, col],
                                        ref)[0][1])
            crmsd.append(centered_rms_dev(tsd.iloc[:, col].values,
                                          ref.values))
        taylor_diagram(pd.np.array(std),
                       pd.np.array(crmsd),
                       pd.np.array(ccoef))
    elif type in ['target']:
        from .. skill_metrics import centered_rms_dev
        from .. skill_metrics import rmsd
        from .. skill_metrics import bias
        from .. skill_metrics import target_diagram
        biases = []
        rmsds = []
        crmsds = []
        ref = tsd.iloc[:, 0].values
        for col in range(1, len(tsd.columns)):
            biases.append(bias(tsd.iloc[:, col].values, ref))
            crmsds.append(centered_rms_dev(tsd.iloc[:, col].values,
                                           ref))
            rmsds.append(rmsd(tsd.iloc[:, col].values,
                              ref))
        target_diagram(pd.np.array(biases),
                       pd.np.array(crmsds),
                       pd.np.array(rmsds))
    elif type in ['xy',
                  'double_mass']:
        # PANDAS was not doing the right thing with xy plots
        # if you wanted lines between markers.
        # Fell back to using raw matplotlib.
        # Boy I do not like matplotlib.

        for colindex in range(colcnt):
            ndf = tsd.iloc[:, colindex*2:colindex*2 + 2]
            if type == 'double_mass':
                ndf = ndf.dropna().cumsum()
            oxdata = pd.np.array(ndf.iloc[:, 0])
            oydata = pd.np.array(ndf.iloc[:, 1])

            plotdict[(logx, logy)](oxdata,
                                   oydata,
                                   linestyle=next(ilinestyles),
                                   color=next(icolors),
                                   marker=next(imarkerstyles),
                                   label=lnames[colindex],
                                   drawstyle=drawstyle)

        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        if legend is True:
            ax.legend(loc='best')

        if type == 'double_mass':
            xtitle = xtitle or 'Cumulative {0}'.format(tsd.columns[0])
            ytitle = ytitle or 'Cumulative {0}'.format(tsd.columns[1])

    elif type in ['norm_xaxis',
                  'norm_yaxis',
                  'lognorm_xaxis',
                  'lognorm_yaxis',
                  'weibull_xaxis',
                  'weibull_yaxis']:
        ppf = tsutils.set_ppf(type.split('_')[0])
        ys = tsd.iloc[:, :]

        for colindex in range(colcnt):
            oydata = pd.np.array(ys.iloc[:, colindex].dropna())
            oydata = pd.np.sort(oydata)[::-1]
            n = len(oydata)
            norm_axis = ax.xaxis
            oxdata = ppf(tsutils.set_plotting_position(n,
                                                       plotting_position))

            if type in ['norm_yaxis',
                        'lognorm_yaxis',
                        'weibull_yaxis']:
                oxdata, oydata = oydata, oxdata
                norm_axis = ax.yaxis

            plotdict[(logx, logy)](oxdata,
                                   oydata,
                                   linestyle=next(ilinestyles),
                                   color=next(icolors),
                                   marker=next(imarkerstyles),
                                   label=lnames[colindex],
                                   drawstyle=drawstyle)

        # Make it pretty
        xtmaj = pd.np.array([0.01, 0.1, 0.5, 0.9, 0.99])
        xtmaj_str = ['1', '10', '50', '90', '99']
        xtmin = pd.np.concatenate([pd.np.linspace(0.001, 0.01, 10),
                                   pd.np.linspace(0.01, 0.1, 10),
                                   pd.np.linspace(0.1, 0.9, 9),
                                   pd.np.linspace(0.9, 0.99, 10),
                                   pd.np.linspace(0.99, 0.999, 10)])
        xtmaj = ppf(xtmaj)
        xtmin = ppf(xtmin)

        norm_axis.set_major_locator(FixedLocator(xtmaj))
        norm_axis.set_minor_locator(FixedLocator(xtmin))

        if type in ['norm_xaxis',
                    'lognorm_xaxis',
                    'weibull_xaxis']:
            ax.set_xticklabels(xtmaj_str)
            ax.set_ylim(ylim)
            ax.set_xlim(ppf(xlim))

        elif type in ['norm_yaxis',
                      'lognorm_yaxis',
                      'weibull_yaxis']:
            ax.set_yticklabels(xtmaj_str)
            ax.set_xlim(xlim)
            ax.set_ylim(ppf(ylim))

        if type in ['norm_xaxis',
                    'norm_yaxis']:
            xtitle = xtitle or 'Normal Distribution'
            ytitle = ytitle or tsd.columns[0]
        elif type in ['lognorm_xaxis',
                      'lognorm_yaxis']:
            xtitle = xtitle or 'Log Normal Distribution'
            ytitle = ytitle or tsd.columns[0]
        elif type in ['weibull_xaxis',
                      'weibull_yaxis']:
            xtitle = xtitle or 'Weibull Distribution'
            ytitle = ytitle or tsd.columns[0]

        if type in ['norm_yaxis',
                    'lognorm_yaxis',
                    'weibull_yaxis']:
            xtitle, ytitle = ytitle, xtitle

        if legend is True:
            ax.legend(loc='best')

    elif type in ['kde',
                  'probability_density']:
        ax = tsd.plot(kind='kde', legend=legend, subplots=subplots,
                      sharex=sharex, sharey=sharey, style=None, logx=logx,
                      logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y,
                      figsize=figsize)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        ytitle = ytitle or 'Density'
        if legend is True:
            plt.legend(loc='best')
    elif type == 'kde_time':
        from scipy.stats.kde import gaussian_kde
        _, (ax0, ax1) = plt.subplots(nrows=1,
                                     ncols=2,
                                     sharey=True,
                                     figsize=figsize,
                                     gridspec_kw={'width_ratios': [1, 4]})
        tsd.plot(legend=legend, subplots=subplots, sharex=sharex,
                 sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim,
                 ylim=ylim, secondary_y=secondary_y, mark_right=mark_right,
                 figsize=figsize, drawstyle=drawstyle, ax=ax1)
        for index, line in enumerate(ax1.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        xtitle = xtitle or 'Time'
        ylimits = ax1.get_ylim()
        ny = pd.np.linspace(ylimits[0], ylimits[1], 1000)
        for col in range(len(tsd.columns)):
            xvals = tsd.iloc[:, col].dropna().values
            pdf = gaussian_kde(xvals)
            ax0.plot(pdf(ny),
                     ny,
                     linestyle=style[col][2:],
                     color=style[col][0],
                     marker=style[col][1],
                     label=tsd.columns[col],
                     drawstyle=drawstyle)
        ax0.set(xlabel='Probability Density', ylabel=ytitle)
    elif type == 'boxplot':
        tsd.boxplot(figsize=figsize)
    elif type == 'scatter_matrix':
        from pandas.plotting import scatter_matrix
        if scatter_matrix_diagonal == 'probablity_density':
            scatter_matrix_diagonal = 'kde'
        scatter_matrix(tsd,
                       diagonal=scatter_matrix_diagonal,
                       figsize=figsize)
    elif type == 'lag_plot':
        from pandas.plotting import lag_plot
        lag_plot(tsd,
                 lag=lag_plot_lag)
        xtitle = xtitle or 'y(t)'
        ytitle = ytitle or 'y(t+{0})'.format(short_freq or 1)
    elif type == 'autocorrelation':
        from pandas.plotting import autocorrelation_plot
        autocorrelation_plot(tsd)
        xtitle = xtitle or 'Time Lag {0}'.format(short_freq)
    elif type == 'bootstrap':
        from pandas.plotting import bootstrap_plot
        bootstrap_plot(tsd,
                       size=bootstrap_size,
                       samples=bootstrap_samples,
                       color='gray')
    elif type == 'heatmap':
        # Find beginning and end years
        byear = tsd.index[0].year
        eyear = tsd.index[-1].year
        tsd = tsutils.asbestfreq(tsd)
        if tsd.index.freqstr != 'D':
            raise ValueError("""
*
*  The "heatmap" plot type can only work with daily time series.
*
""")
        dr = pd.date_range('{0}-01-01'.format(byear),
                           '{0}-12-31'.format(eyear),
                           freq='D')
        ntsd = tsd.reindex(index=dr)
        groups = ntsd.iloc[:, 0].groupby(pd.TimeGrouper('A'))
        years = pd.DataFrame()
        for name, group in groups:
            ngroup = group.values
            if len(group.values) == 365:
                ngroup = pd.np.append(group.values, [pd.np.nan])
            years[name.year] = ngroup
        years = years.T
        plt.imshow(years,
                   interpolation=None,
                   aspect='auto')
        plt.colorbar()
        yticks = list(range(byear, eyear + 1))
        skip = len(yticks)//20 + 1
        plt.yticks(range(0, len(yticks), skip), yticks[::skip])
        mnths = [0, 30, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
        mnths_labels = ['Jan',
                        'Feb',
                        'Mar',
                        'Apr',
                        'May',
                        'Jun',
                        'Jul',
                        'Aug',
                        'Sep',
                        'Oct',
                        'Nov',
                        'Dec']
        plt.xticks(mnths, mnths_labels)
        grid = False
    elif (type == 'bar' or
          type == 'bar_stacked' or
          type == 'barh' or
          type == 'barh_stacked'):
        stacked = False
        if type[-7:] == 'stacked':
            stacked = True
        kind = 'bar'
        if type[:4] == 'barh':
            kind = 'barh'
        ax = tsd.plot(kind=kind, legend=legend, stacked=stacked,
                      style=style, logx=logx, logy=logy, xlim=xlim,
                      ylim=ylim, figsize=figsize)
        for index, line in enumerate(ax.lines):
            plt.setp(line, color=style[index][0])
            plt.setp(line, marker=style[index][1])
            plt.setp(line, linestyle=style[index][2:])
        freq = tsutils.asbestfreq(tsd, force_freq=force_freq).index.freqstr
        if freq is not None:
            if 'A' in freq:
                endchar = 4
            elif 'M' in freq:
                endchar = 7
            elif 'D' in freq:
                endchar = 10
            elif 'H' in freq:
                endchar = 13
            else:
                endchar = None
            nticklabels = []
            if kind == 'bar':
                taxis = ax.xaxis
            else:
                taxis = ax.yaxis
            for index, i in enumerate(taxis.get_majorticklabels()):
                if index % label_skip:
                    nticklabels.append(' ')
                else:
                    nticklabels.append(i.get_text()[:endchar])
            taxis.set_ticklabels(nticklabels)
            plt.setp(taxis.get_majorticklabels(), rotation=label_rotation)
        if legend is True:
            plt.legend(loc='best')
    elif type == 'histogram':
        tsd.hist(figsize=figsize)
    else:
        raise ValueError("""
*
*   Plot 'type' {0} is not supported.
*
""".format(type))

    if xy_match_line:
        if isinstance(xy_match_line, str):
            xymsty = xy_match_line
        else:
            xymsty = 'g--'
        nxlim = ax.get_xlim()
        nylim = ax.get_ylim()
        maxt = max(nxlim[1], nylim[1])
        mint = min(nxlim[0], nylim[0])
        ax.plot([mint, maxt], [mint, maxt], xymsty, zorder=1)
        ax.set_ylim(nylim)
        ax.set_xlim(nxlim)

    plt.xlabel(xtitle)
    plt.ylabel(ytitle)

    if invert_xaxis is True:
        plt.gca().invert_xaxis()
    if invert_yaxis is True:
        plt.gca().invert_yaxis()

    plt.grid(grid)

    plt.title(title)
    plt.tight_layout()
    if ofilename is None:
        return plt
    plt.savefig(ofilename)
Ejemplo n.º 31
0
    plt.plot([x for x in range(len(truth))], truth, label='truth')
    axes = plt.gca()
    axes.set_ylim([miny, maxy])
    plt.legend()
    if name is not None:
        plt.savefig(name)
    else:
        plt.show()
    plt.close()


def plot_relation_power_output(df, attr):
    df.boxplot(by=attr, column="Power average [kW]", showfliers=False)
    axes = plt.gca()
    plt.show()
    plt.close()


#df["Wind average [m/s]"][0:100].plot()
#plt.show()
if __name__ == "__main__":
    df = read_file(sys.argv[1])
    df = arrange_data(df)

    autocorrelation_plot(df["Wind average [m/s]"][0:100])
    plt.show()
    #plot_relation_power_output(df, "hour")

#print(df["Power average [kW]"].mean())
#print(df["Wind average [m/s]"].mean())
Ejemplo n.º 32
0
def prot02(series):
  autocorrelation_plot(series)
  pyplot.show()
Ejemplo n.º 33
0
plt.subplot(2, 1, 2)  # 2 rows, 1 column, plot 2
plt.plot(dataSet2)
plt.title('dataSet2')
plt.text(2, 0.8, 'dataSet2')
""" LagPlot zur optischen Identifizierung der Autokorrelation
from pandas.tools.plotting import lag_plot
lag_plot(dataSet1) 
lag_plot(dataSet2) 
"""

# Aufruf Bibliothek Autokorrelation und Ausdruck

# Plotten der Autokorrelationsfunktionen
plt.figure(2)
autocorrelation_plot(dataSet1)
plt.show()

plt.figure(3)
autocorrelation_plot(dataSet2)
plt.show()

# Import der ARIMA Funtion

# Anpassung des ARIMA-Modells; Order aus Beispiel übernommen
"""First, we fit an ARIMA(5,1,0) model. 
This sets the lag value to 5 for autoregression, 
uses a difference order of 1 to make the time series stationary, 
and uses a moving average model of 0"""

model = ARIMA(dataSet1, order=(10, 1, 0))
Ejemplo n.º 34
0
import matplotlib.pyplot as plt
import helper_functions as hf
from pandas.plotting import autocorrelation_plot

#import quandl
# My QUANDL API_KEY:
#quandl.ApiConfig.api_key = '49thVPqqr_9BPMHXsRfS'
#mydata = quandl.get('FSE/EON_X')

df = hf.GetIndiaNifty500HistData()
df.
df.info()
print(df)

fig, ax = plt.subplots(nrows=2)

#x=df.Date, 
df.Price.plot(ax=ax[0])
#plt.plot(df.Date, df.Price)
autocorrelation_plot(df.Price, ax=ax[1])
plt.show()

    'gyr_phone_y_freq_0.8_Hz_ws_40', 'acc_phone_y_freq_1.6_Hz_ws_40'
]
possible_feature_sets = [
    basic_features, features_after_chapter_3, features_after_chapter_4,
    features_after_chapter_5, selected_features
]
feature_names = [
    'initial set', 'Chapter 3', 'Chapter 4', 'Chapter 5', 'Selected features'
]

# Let us first study whether the time series is stationary and what the autocorrelations are.

dftest = adfuller(dataset['gyr_phone_z'], autolag='AIC')

plt.Figure()
autocorrelation_plot(dataset['gyr_phone_z'])
DataViz.save(plt)
plt.show()

# Now let us focus on the learning part.

learner = TemporalRegressionAlgorithms()
eval = RegressionEvaluation()

# We repeat the experiment a number of times to get a bit more robust data as the initialization of e.g. the NN is random.

repeats = 10

# we set a washout time to give the NN's the time to stabilize. We do not compute the error during the washout time.

washout_time = 10