def autocorrelation_graph(X): """ Plot the autocorrelation graph of a pandas series Pre-condition: X - a pandas Series """ plt.figure(figsize=(10, 5)) autocorrelation_plot(X) plt.show()
def plot_autocorrelation(self, ax=None): """ Plots autocorrelation of power data Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm Returns ------- matplotlib.axis """ if ax is None: ax = plt.gca() for power in self.power_series(): autocorrelation_plot(power, ax=ax) return ax
def test_autocorrelation_plot(self): from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) _check_plot_works(autocorrelation_plot, series=self.ts.values) ax = autocorrelation_plot(self.ts, label='Test') self._check_legend_labels(ax, labels=['Test'])
ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3) ax4 = fig.add_subplot(2, 2, 4) single.adj_nav.plot( ax=ax1, sharex=True, ) #ylim = (0, 14) pplt.table(ax=ax2, data=np.round(single.rate.describe().T, 2), loc='upper right', colWidths=[0.2, 0.2, 0.2], edges='open', cellLoc='centel', colLoc='centel', rowLoc='centel', in_layout=False) single.rate.plot.kde(ax=ax2, sharex=True) single.rate.plot.hist(ax=ax2, sharex=True, secondary_y=True, alpha=0.5, bins=50) single.rate.plot(ax=ax3) pplt.autocorrelation_plot(single.rate, ax=ax4) plt.show() profile = single(title='Titanic Dataset') profile.to_file(output_file='result/titanic_report.html')
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf # In[32]: fig_first = plot_acf(df["Milk First Difference"].dropna()) # In[33]: fig_seasonal_first = plot_acf(df["Seasonal First Difference"].dropna()) # In[34]: from pandas.plotting import autocorrelation_plot autocorrelation_plot(df['Seasonal First Difference'].dropna()) # In[35]: result = plot_pacf(df["Seasonal First Difference"].dropna()) # In[36]: fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(df['Seasonal First Difference'].iloc[13:], lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(df['Seasonal First Difference'].iloc[13:], lags=40,
df.Server.value_counts()[:25] # #ds.to_csv("data/VirtualOneServer.csv") #ds1.to_csv("data/VirtualFormattedonseXerver.csv") # #df.to_csv("data/virtualMemoryData.csv") #plt.plot(df[["Time","MemoryUsed"]]) #plt.show() from pandas.plotting import autocorrelation_plot from matplotlib.pyplot import figure figure(num=None, figsize=(20, 12), dpi=80, facecolor='w', edgecolor='k') autocorrelation_plot(ds["MemoryUsed"]) plt.show() figure(num=None, figsize=(20, 12), dpi=80, facecolor='w', edgecolor='k') plt.plot(y_test) plt.show() rmse_val = [] for K in range(100): K = K + 1 model = KNeighborsRegressor(n_neighbors=K) model.fit(X_train, y_train) #fit the model pred = model.predict(X_test) #make prediction on test set error = np.sqrt(mse(y_test, pred)) #calculate rmse
,[Close] FROM [NSEData].[dbo].[IndexHistory] where [Index Name] = 'NIFTY 50' ''' df = pd.read_sql_query(sql, engine) df.index = df['Date'].apply(pd.to_datetime) del df['Date'] df = df.fillna(df.bfill()) df = df['Close'].resample('MS').mean() df.plot() plt.show() autocorrelation_plot(df) plt.show() quantity = df.values size = int(len(quantity) * 0.66) train, test = quantity[0:size], quantity[size:len(quantity)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(5 ,2 ,0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat[0]) obs = test[t]
df_test = pd.read_csv( 'ElectricDemandForecasting-DL-master_data_hourly_20140102_20191101_test.csv', parse_dates=['datetime'], date_parser=dateparse, index_col=1) # convert to datetime df_test['datetime'] = pd.to_datetime(df_test['datetime'], utc=True) df_train['datetime'] = pd.to_datetime(df_train['datetime'], utc=True) df_test = df_test.set_index('datetime') df_train = df_train.set_index('datetime') from pandas.plotting import autocorrelation_plot autocorrelation_plot(df_test) pyplot.show() from pandas import DataFrame from statsmodels.tsa.arima_model import ARIMA model = ARIMA(df_test, order=(1, 1, 0)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = DataFrame(model_fit.resid) residuals.plot() pyplot.show() residuals.plot(kind='kde') pyplot.show() print(residuals.describe())
print(df['electricity_available'].shift(1)) df['Seasonal First Difference'] = df['electricity_available'] - df[ 'electricity_available'].shift(12) print(df.head(14)) ## Again test Dickey Fuller test adfuller_test(df['Seasonal First Difference'].dropna()) #plt.plot(df['Seasonal First Difference']) #plt.show() # Correlations from pandas.plotting import autocorrelation_plot autocorrelation_plot(df['electricity_available']) #plt.show() from statsmodels.graphics.tsaplots import plot_acf, plot_pacf fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = plot_acf(df['Seasonal First Difference'].iloc[13:], lags=40, ax=ax1) #plt.show() ax2 = fig.add_subplot(212) fig = plot_pacf(df['Seasonal First Difference'].iloc[13:], lags=40, ax=ax2) #plt.show()
plot_series(time, series) plt.plot(time, signal, "k-") plt.show() signal = impulses(time, 10, seed=42) series = autocorrelation(signal, {1: 0.70, 50: 0.2}) plot_series(time, series) plt.plot(time, signal, "k-") plt.show() series_diff1 = series[1:] - series[:-1] plot_series(time[1:], series_diff1) from pandas.plotting import autocorrelation_plot autocorrelation_plot(series) from statsmodels.tsa.arima_model import ARIMA model = ARIMA(series, order=(5, 1, 0)) model_fit = model.fit(disp=0) print(model_fit.summary()) import pandas as pd df = pd.read_csv("sunspots.csv", parse_dates=["Date"], index_col="Date") series = df["Monthly Mean Total Sunspot Number"].asfreq("1M") series.head() series.plot(figsize=(12, 5)) series["1995-01-01":].plot()
df['seasonal diff'].plot() adf_check(df['seasonal diff'].dropna()) #seasonal first difference df['seasonal first diff'] = df['first diff'] - df['first diff'].shift(12) df['seasonal first diff'].plot() adf_check(df['seasonal first diff'].dropna()) #autocorrelation plots from statsmodels.graphics.tsaplots import plot_acf, plot_pacf fig_first = plot_acf(df['first diff'].dropna()) fig_seasonal_first = plot_acf(df['seasonal first diff'].dropna()) #can also be done in pandas from pandas.plotting import autocorrelation_plot autocorrelation_plot(df['seasonal first diff'].dropna()) #partial autocorrelation plots result = plot_pacf(df['seasonal first diff'].dropna()) plot_acf(df['seasonal first diff'].dropna()) plot_pacf(df['seasonal first diff'].dropna()) #acutally building ARIMA model from statsmodels.tsa.arima_model import ARIMA #seasonal model model = sm.tsa.statespace.SARIMAX(df['pounds'], order=(0,1,0), seasonal_order=(1,1,1,12)) results = model.fit() print(results.summary()) results.resid results.resid.plot()
def main(): print 'loading data' data = load_data('aapl.us.txt') data.plot() plt.title('data before differencing') plt.show() test_stationarity(data['open']) first_difference = apply_differencing(data) test_stationarity(first_difference['open']) first_difference.plot() plt.title('data after differencing') plt.show() autocorrelation_plot(data) plt.title('autocorrelation of data') plt.show() autocorrelation_plot(first_difference) plt.title('autocorrelation of data after differencing') plt.show() random_array = pd.DataFrame(np.random.rand(1, len( first_difference.index))).transpose() autocorrelation_plot(random_array) plt.title('autocorrelation of random number array') plt.show() print 'fitting arema model' # result = seasonal_decompose(data, model='multiplicative', freq = 6) # result.plot() # plt.show() # print 'residuals 5 number summary' # # print result.resid.describe() train = data.loc['2007-01-01':'2016-12-31'] test = data.loc['2017-01-01':] stepwise_model = auto_arima(train, start_p=10, start_q=0, max_p=1000, max_q=1000, max_d=1000, max_order=1000, m=365, start_P=0, seasonal=False, d=1, D=1, trace=True, suppress_warnings=True, stepwise=True) print 'final arima order \n \n' print stepwise_model.order stepwise_model.fit(train) future_forecast = stepwise_model.predict(n_periods=len(test['open'])) # print(future_forecast) future_forecast = pd.DataFrame(future_forecast, index=test.index, columns=['Prediction']) future_forecast.plot(title='forecast for 2017') plt.show() test.plot(title='actual data for 2017') plt.show() prediction_error = pd.DataFrame( (future_forecast['Prediction'] - test['open'])**2, index=test.index) prediction_error_monthly = (prediction_error.groupby( pd.Grouper(freq='M')).mean())**0.5 prediction_error_monthly.plot(title='Monthly mean square error') plt.show()
model = VAR(endog=arsi_d) model_fit = model.fit() yhat = model_fit.forecast(model_fit.y, steps=1) print(yhat) #%% #%% from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_squared_error from pandas.plotting import autocorrelation_plot #ARIMA NDVI = arsi.NDVI autocorrelation_plot(NDVI) size = int(len(NDVI) * 0.7) train, test = NDVI[0:size], NDVI[size:len(NDVI)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(5,0,2)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions)
tsa_plots.plot_acf(Walmart.Footfalls,lags=12) tsa_plots.plot_pacf(Walmart.Footfalls,lags=12) model1=ARIMA(Walmart.Footfalls,order=(12,1,1)).fit(disp=0) model2=ARIMA(Walmart.Footfalls,order=(1,1,5)).fit(disp=0) model1.aic model2.aic p=1 q=0 d=1 pdq=[] aic=[] for q in range(7): model=ARIMA(Walmart.Footfalls,order=(p,d,q)).fit(disp=0) x = model.aic x1 = p,d,q aic.append(x) pdq.append(x1) keys = pdq values = aic d = dict(zip(keys, values)) print (d) from pandas.plotting import autocorrelation_plot autocorrelation_plot(Walmart.Footfalls)
# In[8]: #Visualize Autocorrelation of Bitcoin prices from pandas.plotting import lag_plot from pandas.plotting import autocorrelation_plot from statsmodels.graphics.tsaplots import plot_acf fig, ax = plt.subplots(1,3, figsize = (21,6)) a0 = lag_plot(BTC, ax = ax[0]) ax[0].set_title('Lag 1 Autocorrelation Scatterplot') a1 = plot_acf(BTC, lags = 30, ax = ax[1]) ax[1].set_title('Autocorrelation over 30 lags') ax[1].set_ylabel('Autocorrelation') ax[1].set_xlabel('Lag') a2 = autocorrelation_plot(BTC, ax = ax[2]) ax[2].set_title('Autocorrelation over all lags') plt.show() # As we can see from the above plots, the day closing prices of Bitcoin are heavily correlated. Our model should take this into account. Now, we should check the stationarity of the series before building our model. We will do this both by plotting and running statistical tests. # In[9]: #Take first differences df_example_diff = df_example.diff() df_example_diff = df_example_diff.dropna() #Plot values fig, ax = plt.subplots(1,3, figsize = (21,6))
'Now check the autocorrelation. How much informtion can be derived from the signal to predict each value.' ) from pandas import DataFrame from pandas import concat from matplotlib import pyplot values = DataFrame(signals.values) dataframe = concat([values.shift(1), values], axis=1) dataframe.columns = ['t-1', 't+1'] result = dataframe.corr() print(result) from pandas.plotting import autocorrelation_plot from sklearn.metrics import mean_squared_error autocorrelation_plot(signals) pyplot.show() # In[] print('Check the default model.') # create lagged dataset values = DataFrame(signals.values) dataframe = concat([values.shift(1), values], axis=1) dataframe.columns = ['t-1', 't+1'] # split into train and test sets X = dataframe.values train, test = X[1:len(X) - 7], X[len(X) - 7:] train_X, train_y = train[:, 0], train[:, 1] test_X, test_y = test[:, 0], test[:, 1]
plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal, label='Seasonality') plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='Residuals') plt.legend(loc='best') plt.tight_layout() plt.show() # from pandas.plotting import autocorrelation_plot from pandas.plotting import autocorrelation_plot x = df["diet"].astype(float) autocorrelation_plot(x) plt.show() from statsmodels.tsa.stattools import acf x_diff = x.diff().dropna() # first item is NA lag_acf = acf(x_diff, nlags=36) plt.plot(lag_acf) plt.title('Autocorrelation Function') plt.show() from statsmodels.tsa.stattools import acf, pacf x = df["gym"].astype(float) x_diff = x.diff().dropna() # first item is NA
return data.loc[data.index.get_level_values(0) == userId].copy() except KeyError: print('El usuario ', userId, ' no existe.') lista = [] df = pd.read_pickle('./pkl/dataset.pkl') for i in df.index.get_level_values(0).drop_duplicates(): dfu = get_user_data(df, i).droplevel(0).loc[:, 'slevel'] idx = pd.date_range('2013-03-27 04:00:00', '2013-06-01 3:00:00', freq='h') d = pd.DataFrame(index=idx) d['slevel'] = dfu a = d.isna().sum() lista.append(a) nulls = d.isna() plt.close() plt.scatter(list(range(len(idx))), nulls, 0.1, marker='x') plt.show() d.ffill(inplace=True) # Autocorrelation Plot plt.close() plt.rcParams.update({'figure.figsize': (9, 5), 'figure.dpi': 120}) autocorrelation_plot(d.slevel) plt.title('{0},{1}'.format(str(i), a)) plt.show() plot_by_week(i) print(a)
# integer encode direction encoder = LabelEncoder() [encoder.fit_transform(values[:,categor_variable]) for categor_variable in range(4,7)] # ensure all data is float values = values.astype('float32') # normalize features scaler = MinMaxScaler(feature_range=(0, 1)) scaled = scaler.fit_transform(values) # frame as supervised learning reframed = series_to_supervised(scaled, 1, 1) # drop columns we don't want to predict reframed.drop(reframed.columns[[7,8,9,11,12,13]], axis=1, inplace=True) print(reframed.head()) autocorrelation_plot(dataset['energy'].dropna()) pyplot.xlim(0,24) pyplot.title('Auto-correlation of hourly demand over a 24 hour period') pyplot.show() reframed.columns = ['windSpeed', 'humidity' ,'temperature' ,'energy_1', 'hour' ,'dayofweek', 'month', 'energy'] reframed.head() X = reframed.values[:,0:7] Y = reframed.values[:,7] tscv = TimeSeriesSplit(n_splits=3) print(tscv) for train_index, test_index in tscv.split(X): print("TRAIN:", train_index, "TEST:", test_index)
#Decomposicao da serie Quantidade['Aumento'] = Quantidade['Quantity'].diff() GraficoLinhas(Quantidade["Aumento"], "Série", "Aumento", "Tempo") Quantidade['Aceleracao'] = Quantidade['Aumento'].diff() GraficoLinhas(Quantidade["Aceleracao"], "Série", "Aceleracao", "Tempo") #Plot dos 3 graficos Componentes( [Quantidade["Quantity"], Quantidade["Aumento"], Quantidade["Aceleracao"]], ['Serie', 'Aumento', 'Aceleracao'], 'Analise da serie temporal') #------------------------------------------------------------------------------ #Analise de autocorrelacao ax = plt.figure(figsize=(16, 12)) autocorrelation_plot(Quantidade["Quantity"]) plt.title('Grafico de autocorrelacao', fontsize=18, loc='left') plt.ylabel('Autocorrelacao', fontsize=16) plt.xlabel('Tempo') ax = ax ax = plt.figure(figsize=(16, 12)) autocorrelation_plot(Quantidade["Aumento"][1:]) plt.title('Grafico de autocorrelacao', fontsize=18, loc='left') plt.ylabel('Autocorrelacao', fontsize=16) plt.xlabel('Tempo') ax = ax ax = plt.figure(figsize=(16, 12)) autocorrelation_plot(Quantidade["Aumento"][2:]) plt.title('Grafico de autocorrelacao', fontsize=18, loc='left')
def drawAutoCorr(df, col): autocorrelation_plot(df[col]) plt.show()
index=albany['Year'].to_list()) #%% from matplotlib import pyplot from pandas.plotting import lag_plot lag_plot(ab) pyplot.show() #%% ## Here we are trying to find a correlation between the lagges valuse values = pd.DataFrame(ab.values) dataframe = pd.concat([values.shift(1), values], axis=1) dataframe.columns = ['t', 't+1'] result = dataframe.corr() print(result) #%% from pandas.plotting import autocorrelation_plot autocorrelation_plot(ab) pyplot.show() #%% from statsmodels.graphics.tsaplots import plot_acf plot_acf(ab, lags=31) pyplot.show() #%% from pandas import read_csv from pandas import DataFrame from pandas import concat from matplotlib import pyplot from sklearn.metrics import mean_squared_error from math import sqrt values = DataFrame(ab.values) dataframe = concat([values.shift(1), values], axis=1) dataframe.columns = ['t', 't+1']
# Figure 8.2 f, axarr = plot.subplots(3, 2) f.subplots_adjust(hspace=0.5) f.subplots_adjust(wspace=0.5) random.seed(0) random_time_series = pd.DataFrame(np.random.normal(0.1, 2, 5200), index=range(0, 5200), columns=['value']) axarr[0, 0].plot(random_time_series.index, random_time_series['value']) axarr[0, 0].set_xlim([101, 500]) axarr[0, 0].set_ylim([-10, 10]) axarr[0, 0].set_xlabel('time') axarr[0, 0].set_ylabel('value') autocorrelation_plot(random_time_series['value'], ax=axarr[0, 1]) axarr[0, 1].set_xlim([0, 30]) axarr[0, 1].set_ylim([-1.1, 1.1]) # rolling_window_data = pd.rolling_mean(random_time_series['value'], 10) rolling_window_data = pd.Series( random_time_series['value']).rolling(window=10).mean() axarr[1, 0].plot(random_time_series.index, rolling_window_data) axarr[1, 0].set_xlim([101, 500]) axarr[1, 0].set_ylim([-10, 10]) axarr[1, 0].set_xlabel('time') axarr[1, 0].set_ylabel('value') autocorrelation_plot(rolling_window_data[10:], ax=axarr[1, 1]) axarr[1, 1].set_xlim([0, 30]) axarr[1, 1].set_ylim([-1.1, 1.1])
# Why is it a nan? diff_data = diff_data.dropna() # If we dont do this, what will happen for the following codes # Plot our data plt.plot(data) plt.figure() plt.plot(diff_data) # Plot the ACF for the data. This call opens a new plot smt.graphics.tsa.plot_acf(data, lags=30, alpha = 0.05) # lags = 30 means drawing 30 lags # Here alpha=.05, 95% confidence intervals are returned # where the standard deviation is computed according to # Bartlett’s formula. # You may change 0.05 to other values for alpha to see what will happen # Plot the ACF for the data. This call opens a new plot # smt.graphics.tsa.plot_pacf(data, lags=30, alpha=0.05) # For differenced time series smt.graphics.tsa.plot_acf(diff_data, lags=30, alpha = 0.05) #plt.figure() #smt.graphics.tsa.plot_pacf(diff_data, lags=30, alpha = 0.05) # Use pandas for ACF, but not plot function for PACF plt.figure() # We need prepare the figure autocorrelation_plot(data)
import numpy as np import tensorflow as tf import tensorflow_probability as tfp from tensorflow_probability import distributions as tfd from tensorflow_probability import sts import pandas as pd from sklearn.preprocessing import MinMaxScaler from pandas.plotting import autocorrelation_plot tf.set_random_seed(666) dataframe = pd.read_csv('international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3) dataset = dataframe.values dataset = np.array(dataset.astype('float32')) autocorrelation_plot(dataset) corr=[] for i in range(0,len(dataset)): #print(i,pd.Series(dataset.T[0]).autocorr(lag=i)) corr.append(pd.Series(dataset.T[0]).autocorr(lag=i)) janela=(np.where(corr[1:-2]==np.max(corr[1:-2]))[0]+1)[0] X0=dataset[0:-12] Y0=dataset[-12:] def build_model(observed_time_series): trend = sts.LocalLinearTrend(observed_time_series=observed_time_series) seasonal = tfp.sts.Seasonal( num_seasons=int(len(dataset)/janela), observed_time_series=observed_time_series)
def plot_autocorrelation(self): # Plot auto correlation autocorrelation_plot(self.series) plt.show()
from pandas import read_csv from pandas import datetime from matplotlib import pyplot from pandas.plotting import autocorrelation_plot from pandas import DataFrame from statsmodels.tsa.arima_model import ARIMA def parser(x): return datetime.strptime('190'+x, '%Y-%m') series = read_csv('sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) #print(series.head()) #series.plot() autocorrelation_plot(series) pyplot.show() """# fit model model = ARIMA(series, order=(5,1,0)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = DataFrame(model_fit.resid) residuals.plot() pyplot.show() residuals.plot(kind='kde') pyplot.show() print(residuals.describe()) """
from pandas import read_csv from pandas import datetime from matplotlib import pyplot from pandas.plotting import autocorrelation_plot def parser(x): return datetime.strptime('190'+x, '%Y-%m') series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) autocorrelation_plot(series) pyplot.show()
import pandas as pd import seaborn as sb import matplotlib.pyplot as plt import mysql.connector as sql from pandas.plotting import autocorrelation_plot from statsmodels.tsa.arima_model import ARIMA from pandas import DataFrame db_connection = sql.connect(host='18.206.99.175', database='market_data', user='******', password='******') dados = pd.read_sql( 'SELECT datetime_buss, price FROM mining_trade where active = "WINQ19" and date(datetime_buss) = date("2019-07-30"))', con=db_connection, index_col='datetime_buss') autocorrelation_plot(dados) # fit model model = ARIMA(dados, order=(3600, 1, 21)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = DataFrame(model_fit.resid) residuals.plot() plt.show() residuals.plot(kind='kde') plt.show() print(residuals.describe())
def plot(input_ts='-', columns=None, start_date=None, end_date=None, clean=False, skiprows=None, index_type='datetime', names=None, ofilename='plot.png', type='time', xtitle='', ytitle='', title='', figsize='10,6.0', legend=None, legend_names=None, subplots=False, sharex=True, sharey=False, colors='auto', linestyles='auto', markerstyles=' ', style='auto', logx=False, logy=False, xaxis='arithmetic', yaxis='arithmetic', xlim=None, ylim=None, secondary_y=False, mark_right=True, scatter_matrix_diagonal='kde', bootstrap_size=50, bootstrap_samples=500, norm_xaxis=False, norm_yaxis=False, lognorm_xaxis=False, lognorm_yaxis=False, xy_match_line='', grid=False, label_rotation=None, label_skip=1, force_freq=None, drawstyle='default', por=False, invert_xaxis=False, invert_yaxis=False, round_index=None, plotting_position='weibull', source_units=None, target_units=None, lag_plot_lag=1): r"""Plot data.""" # Need to work around some old option defaults with the implementation of # mando legend = bool(legend == '' or legend == 'True' or legend is None) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.ticker import FixedLocator tsd = tsutils.common_kwds(tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna='all', source_units=source_units, target_units=target_units, clean=clean) if type in ['bootstrap', 'heatmap', 'autocorrelation', 'lag_plot']: if len(tsd.columns) != 1: raise ValueError(""" * * The '{1}' plot can only work with 1 time-series in the DataFrame. * The DataFrame that you supplied has {0} time-series. * """.format(len(tsd.columns), type)) if por is True: tsd = tsutils.common_kwds(tsutils.read_iso_ts(tsd), start_date=start_date, end_date=end_date, round_index=round_index, dropna='no') # This is to help pretty print the frequency try: try: pltfreq = str(tsd.index.freq, 'utf-8').lower() except TypeError: pltfreq = str(tsd.index.freq).lower() if pltfreq.split(' ')[0][1:] == '1': beginstr = 3 else: beginstr = 1 if pltfreq == 'none': short_freq = '' else: # short freq string (day) OR (2 day) short_freq = '({0})'.format(pltfreq[beginstr:-1]) except AttributeError: short_freq = '' if legend_names: lnames = tsutils.make_list(legend_names) if len(lnames) != len(set(lnames)): raise ValueError(""" * * Each name in legend_names must be unique. * """) if len(tsd.columns) == len(lnames): renamedict = dict(list(zip(tsd.columns, lnames))) elif type == 'xy' and len(tsd.columns) // 2 == len(lnames): renamedict = dict(list(zip(tsd.columns[2::2], lnames[1:]))) renamedict[tsd.columns[1]] = lnames[0] else: raise ValueError(""" * * For 'legend_names' you must have the same number of comma * separated names as columns in the input data. The input * data has {0} where the number of 'legend_names' is {1}. * * If 'xy' type you need to have legend names as x,y1,y2,y3,... * """.format(len(tsd.columns), len(lnames))) tsd.rename(columns=renamedict, inplace=True) else: lnames = tsd.columns if colors == 'auto': colors = color_list else: colors = tsutils.make_list(colors) if linestyles == 'auto': linestyles = line_list else: linestyles = tsutils.make_list(linestyles) if markerstyles == 'auto': markerstyles = marker_list else: markerstyles = tsutils.make_list(markerstyles) if markerstyles is None: markerstyles = ' ' if style != 'auto': nstyle = tsutils.make_list(style) if len(nstyle) != len(tsd.columns): raise ValueError(""" * * You have to have the same number of style strings as time-series to plot. * You supplied '{0}' for style which has {1} style strings, * but you have {2} time-series. * """.format(style, len(nstyle), len(tsd.columns))) colors = [] markerstyles = [] linestyles = [] for st in nstyle: colors.append(st[0]) if len(st) == 1: markerstyles.append(' ') linestyles.append('-') continue if st[1] in marker_list: markerstyles.append(st[1]) try: linestyles.append(st[2:]) except IndexError: linestyles.append(' ') else: markerstyles.append(' ') linestyles.append(st[1:]) if linestyles is None: linestyles = [' '] else: linestyles = [' ' if i == ' ' else i for i in linestyles] markerstyles = [' ' if i is None else i for i in markerstyles] icolors = itertools.cycle(colors) imarkerstyles = itertools.cycle(markerstyles) ilinestyles = itertools.cycle(linestyles) style = ['{0}{1}{2}'.format(next(icolors), next(imarkerstyles), next(ilinestyles)) for i in list(range(len(tsd.columns)))] # reset to beginning of iterator icolors = itertools.cycle(colors) imarkerstyles = itertools.cycle(markerstyles) ilinestyles = itertools.cycle(linestyles) if (logx is True or logy is True or norm_xaxis is True or norm_yaxis is True or lognorm_xaxis is True or lognorm_yaxis is True): warnings.warn(""" * * The --logx, --logy, --norm_xaxis, --norm_yaxis, --lognorm_xaxis, and * --lognorm_yaxis options are deprecated. * * For --logx use --xaxis="log" * For --logy use --yaxis="log" * For --norm_xaxis use --type="norm_xaxis" * For --norm_yaxis use --type="norm_yaxis" * For --lognorm_xaxis use --type="lognorm_xaxis" * For --lognorm_yaxis use --type="lognorm_yaxis" * """) if xaxis == 'log': logx = True if yaxis == 'log': logy = True if type in ['norm_xaxis', 'lognorm_xaxis', 'weibull_xaxis']: xaxis = 'normal' if logx is True: logx = False warnings.warn(""" * * The --type={1} cannot also have the xaxis set to {0}. * The {0} setting for xaxis is ignored. * """.format(xaxis, type)) if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: yaxis = 'normal' if logy is True: logy = False warnings.warn(""" * * The --type={1} cannot also have the yaxis set to {0}. * The {0} setting for yaxis is ignored. * """.format(yaxis, type)) xlim = _know_your_limits(xlim, axis=xaxis) ylim = _know_your_limits(ylim, axis=yaxis) figsize = tsutils.make_list(figsize) if not isinstance(tsd.index, pd.DatetimeIndex): tsd.insert(0, tsd.index.name, tsd.index) if type in ['xy', 'double_mass']: if tsd.shape[1] % 2 != 0: raise AttributeError(""" * * The 'xy' and 'double_mass' types must have an even number of columns * arranged as x,y pairs. You supplied {0} columns. * """.format(tsd.shape[1])) colcnt = tsd.shape[1] // 2 elif type in ['norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis']: colcnt = tsd.shape[1] if type in ['xy', 'double_mass', 'norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis', 'heatmap']: _, ax = plt.subplots(figsize=figsize) plotdict = {(False, True): ax.semilogy, (True, False): ax.semilogx, (True, True): ax.loglog, (False, False): ax.plot} if type == 'time': ax = tsd.plot(legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, mark_right=mark_right, figsize=figsize, drawstyle=drawstyle) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) xtitle = xtitle or 'Time' if legend is True: plt.legend(loc='best') elif type in ['taylor']: from .. skill_metrics import centered_rms_dev from .. skill_metrics import taylor_diagram ref = tsd.iloc[:, 0] std = [pd.np.std(ref)] ccoef = [1.0] crmsd = [0.0] for col in range(1, len(tsd.columns)): std.append(pd.np.std(tsd.iloc[:, col])) ccoef.append(pd.np.corrcoef(tsd.iloc[:, col], ref)[0][1]) crmsd.append(centered_rms_dev(tsd.iloc[:, col].values, ref.values)) taylor_diagram(pd.np.array(std), pd.np.array(crmsd), pd.np.array(ccoef)) elif type in ['target']: from .. skill_metrics import centered_rms_dev from .. skill_metrics import rmsd from .. skill_metrics import bias from .. skill_metrics import target_diagram biases = [] rmsds = [] crmsds = [] ref = tsd.iloc[:, 0].values for col in range(1, len(tsd.columns)): biases.append(bias(tsd.iloc[:, col].values, ref)) crmsds.append(centered_rms_dev(tsd.iloc[:, col].values, ref)) rmsds.append(rmsd(tsd.iloc[:, col].values, ref)) target_diagram(pd.np.array(biases), pd.np.array(crmsds), pd.np.array(rmsds)) elif type in ['xy', 'double_mass']: # PANDAS was not doing the right thing with xy plots # if you wanted lines between markers. # Fell back to using raw matplotlib. # Boy I do not like matplotlib. for colindex in range(colcnt): ndf = tsd.iloc[:, colindex*2:colindex*2 + 2] if type == 'double_mass': ndf = ndf.dropna().cumsum() oxdata = pd.np.array(ndf.iloc[:, 0]) oydata = pd.np.array(ndf.iloc[:, 1]) plotdict[(logx, logy)](oxdata, oydata, linestyle=next(ilinestyles), color=next(icolors), marker=next(imarkerstyles), label=lnames[colindex], drawstyle=drawstyle) ax.set_xlim(xlim) ax.set_ylim(ylim) if legend is True: ax.legend(loc='best') if type == 'double_mass': xtitle = xtitle or 'Cumulative {0}'.format(tsd.columns[0]) ytitle = ytitle or 'Cumulative {0}'.format(tsd.columns[1]) elif type in ['norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis']: ppf = tsutils.set_ppf(type.split('_')[0]) ys = tsd.iloc[:, :] for colindex in range(colcnt): oydata = pd.np.array(ys.iloc[:, colindex].dropna()) oydata = pd.np.sort(oydata)[::-1] n = len(oydata) norm_axis = ax.xaxis oxdata = ppf(tsutils.set_plotting_position(n, plotting_position)) if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: oxdata, oydata = oydata, oxdata norm_axis = ax.yaxis plotdict[(logx, logy)](oxdata, oydata, linestyle=next(ilinestyles), color=next(icolors), marker=next(imarkerstyles), label=lnames[colindex], drawstyle=drawstyle) # Make it pretty xtmaj = pd.np.array([0.01, 0.1, 0.5, 0.9, 0.99]) xtmaj_str = ['1', '10', '50', '90', '99'] xtmin = pd.np.concatenate([pd.np.linspace(0.001, 0.01, 10), pd.np.linspace(0.01, 0.1, 10), pd.np.linspace(0.1, 0.9, 9), pd.np.linspace(0.9, 0.99, 10), pd.np.linspace(0.99, 0.999, 10)]) xtmaj = ppf(xtmaj) xtmin = ppf(xtmin) norm_axis.set_major_locator(FixedLocator(xtmaj)) norm_axis.set_minor_locator(FixedLocator(xtmin)) if type in ['norm_xaxis', 'lognorm_xaxis', 'weibull_xaxis']: ax.set_xticklabels(xtmaj_str) ax.set_ylim(ylim) ax.set_xlim(ppf(xlim)) elif type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: ax.set_yticklabels(xtmaj_str) ax.set_xlim(xlim) ax.set_ylim(ppf(ylim)) if type in ['norm_xaxis', 'norm_yaxis']: xtitle = xtitle or 'Normal Distribution' ytitle = ytitle or tsd.columns[0] elif type in ['lognorm_xaxis', 'lognorm_yaxis']: xtitle = xtitle or 'Log Normal Distribution' ytitle = ytitle or tsd.columns[0] elif type in ['weibull_xaxis', 'weibull_yaxis']: xtitle = xtitle or 'Weibull Distribution' ytitle = ytitle or tsd.columns[0] if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: xtitle, ytitle = ytitle, xtitle if legend is True: ax.legend(loc='best') elif type in ['kde', 'probability_density']: ax = tsd.plot(kind='kde', legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, figsize=figsize) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) ytitle = ytitle or 'Density' if legend is True: plt.legend(loc='best') elif type == 'kde_time': from scipy.stats.kde import gaussian_kde _, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=figsize, gridspec_kw={'width_ratios': [1, 4]}) tsd.plot(legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, mark_right=mark_right, figsize=figsize, drawstyle=drawstyle, ax=ax1) for index, line in enumerate(ax1.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) xtitle = xtitle or 'Time' ylimits = ax1.get_ylim() ny = pd.np.linspace(ylimits[0], ylimits[1], 1000) for col in range(len(tsd.columns)): xvals = tsd.iloc[:, col].dropna().values pdf = gaussian_kde(xvals) ax0.plot(pdf(ny), ny, linestyle=style[col][2:], color=style[col][0], marker=style[col][1], label=tsd.columns[col], drawstyle=drawstyle) ax0.set(xlabel='Probability Density', ylabel=ytitle) elif type == 'boxplot': tsd.boxplot(figsize=figsize) elif type == 'scatter_matrix': from pandas.plotting import scatter_matrix if scatter_matrix_diagonal == 'probablity_density': scatter_matrix_diagonal = 'kde' scatter_matrix(tsd, diagonal=scatter_matrix_diagonal, figsize=figsize) elif type == 'lag_plot': from pandas.plotting import lag_plot lag_plot(tsd, lag=lag_plot_lag) xtitle = xtitle or 'y(t)' ytitle = ytitle or 'y(t+{0})'.format(short_freq or 1) elif type == 'autocorrelation': from pandas.plotting import autocorrelation_plot autocorrelation_plot(tsd) xtitle = xtitle or 'Time Lag {0}'.format(short_freq) elif type == 'bootstrap': from pandas.plotting import bootstrap_plot bootstrap_plot(tsd, size=bootstrap_size, samples=bootstrap_samples, color='gray') elif type == 'heatmap': # Find beginning and end years byear = tsd.index[0].year eyear = tsd.index[-1].year tsd = tsutils.asbestfreq(tsd) if tsd.index.freqstr != 'D': raise ValueError(""" * * The "heatmap" plot type can only work with daily time series. * """) dr = pd.date_range('{0}-01-01'.format(byear), '{0}-12-31'.format(eyear), freq='D') ntsd = tsd.reindex(index=dr) groups = ntsd.iloc[:, 0].groupby(pd.TimeGrouper('A')) years = pd.DataFrame() for name, group in groups: ngroup = group.values if len(group.values) == 365: ngroup = pd.np.append(group.values, [pd.np.nan]) years[name.year] = ngroup years = years.T plt.imshow(years, interpolation=None, aspect='auto') plt.colorbar() yticks = list(range(byear, eyear + 1)) skip = len(yticks)//20 + 1 plt.yticks(range(0, len(yticks), skip), yticks[::skip]) mnths = [0, 30, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] mnths_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] plt.xticks(mnths, mnths_labels) grid = False elif (type == 'bar' or type == 'bar_stacked' or type == 'barh' or type == 'barh_stacked'): stacked = False if type[-7:] == 'stacked': stacked = True kind = 'bar' if type[:4] == 'barh': kind = 'barh' ax = tsd.plot(kind=kind, legend=legend, stacked=stacked, style=style, logx=logx, logy=logy, xlim=xlim, ylim=ylim, figsize=figsize) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) freq = tsutils.asbestfreq(tsd, force_freq=force_freq).index.freqstr if freq is not None: if 'A' in freq: endchar = 4 elif 'M' in freq: endchar = 7 elif 'D' in freq: endchar = 10 elif 'H' in freq: endchar = 13 else: endchar = None nticklabels = [] if kind == 'bar': taxis = ax.xaxis else: taxis = ax.yaxis for index, i in enumerate(taxis.get_majorticklabels()): if index % label_skip: nticklabels.append(' ') else: nticklabels.append(i.get_text()[:endchar]) taxis.set_ticklabels(nticklabels) plt.setp(taxis.get_majorticklabels(), rotation=label_rotation) if legend is True: plt.legend(loc='best') elif type == 'histogram': tsd.hist(figsize=figsize) else: raise ValueError(""" * * Plot 'type' {0} is not supported. * """.format(type)) if xy_match_line: if isinstance(xy_match_line, str): xymsty = xy_match_line else: xymsty = 'g--' nxlim = ax.get_xlim() nylim = ax.get_ylim() maxt = max(nxlim[1], nylim[1]) mint = min(nxlim[0], nylim[0]) ax.plot([mint, maxt], [mint, maxt], xymsty, zorder=1) ax.set_ylim(nylim) ax.set_xlim(nxlim) plt.xlabel(xtitle) plt.ylabel(ytitle) if invert_xaxis is True: plt.gca().invert_xaxis() if invert_yaxis is True: plt.gca().invert_yaxis() plt.grid(grid) plt.title(title) plt.tight_layout() if ofilename is None: return plt plt.savefig(ofilename)
plt.plot([x for x in range(len(truth))], truth, label='truth') axes = plt.gca() axes.set_ylim([miny, maxy]) plt.legend() if name is not None: plt.savefig(name) else: plt.show() plt.close() def plot_relation_power_output(df, attr): df.boxplot(by=attr, column="Power average [kW]", showfliers=False) axes = plt.gca() plt.show() plt.close() #df["Wind average [m/s]"][0:100].plot() #plt.show() if __name__ == "__main__": df = read_file(sys.argv[1]) df = arrange_data(df) autocorrelation_plot(df["Wind average [m/s]"][0:100]) plt.show() #plot_relation_power_output(df, "hour") #print(df["Power average [kW]"].mean()) #print(df["Wind average [m/s]"].mean())
def prot02(series): autocorrelation_plot(series) pyplot.show()
plt.subplot(2, 1, 2) # 2 rows, 1 column, plot 2 plt.plot(dataSet2) plt.title('dataSet2') plt.text(2, 0.8, 'dataSet2') """ LagPlot zur optischen Identifizierung der Autokorrelation from pandas.tools.plotting import lag_plot lag_plot(dataSet1) lag_plot(dataSet2) """ # Aufruf Bibliothek Autokorrelation und Ausdruck # Plotten der Autokorrelationsfunktionen plt.figure(2) autocorrelation_plot(dataSet1) plt.show() plt.figure(3) autocorrelation_plot(dataSet2) plt.show() # Import der ARIMA Funtion # Anpassung des ARIMA-Modells; Order aus Beispiel übernommen """First, we fit an ARIMA(5,1,0) model. This sets the lag value to 5 for autoregression, uses a difference order of 1 to make the time series stationary, and uses a moving average model of 0""" model = ARIMA(dataSet1, order=(10, 1, 0))
import matplotlib.pyplot as plt import helper_functions as hf from pandas.plotting import autocorrelation_plot #import quandl # My QUANDL API_KEY: #quandl.ApiConfig.api_key = '49thVPqqr_9BPMHXsRfS' #mydata = quandl.get('FSE/EON_X') df = hf.GetIndiaNifty500HistData() df. df.info() print(df) fig, ax = plt.subplots(nrows=2) #x=df.Date, df.Price.plot(ax=ax[0]) #plt.plot(df.Date, df.Price) autocorrelation_plot(df.Price, ax=ax[1]) plt.show()
'gyr_phone_y_freq_0.8_Hz_ws_40', 'acc_phone_y_freq_1.6_Hz_ws_40' ] possible_feature_sets = [ basic_features, features_after_chapter_3, features_after_chapter_4, features_after_chapter_5, selected_features ] feature_names = [ 'initial set', 'Chapter 3', 'Chapter 4', 'Chapter 5', 'Selected features' ] # Let us first study whether the time series is stationary and what the autocorrelations are. dftest = adfuller(dataset['gyr_phone_z'], autolag='AIC') plt.Figure() autocorrelation_plot(dataset['gyr_phone_z']) DataViz.save(plt) plt.show() # Now let us focus on the learning part. learner = TemporalRegressionAlgorithms() eval = RegressionEvaluation() # We repeat the experiment a number of times to get a bit more robust data as the initialization of e.g. the NN is random. repeats = 10 # we set a washout time to give the NN's the time to stabilize. We do not compute the error during the washout time. washout_time = 10