def cross_corr_mat(df, yi_col, yj_col, lag=0): yi_yi = acf(df[yi_col].values, unbiased=False, nlags=len(df)-2) yj_yj = acf(df[yj_col].values, unbiased=False, nlags=len(df)-2) yi_yj = ccf(df[yi_col].values, df[yj_col].values, unbiased=False) yj_yi = ccf(df[yj_col].values, df[yi_col].values, unbiased=False) ccm = pd.DataFrame({yi_col: [yi_yi[lag], yj_yi[lag]], yj_col: [yi_yj[lag], yj_yj[lag]]}, index=[yi_col, yj_col]) return ccm
def compare_acf_ccf(): #this is the data series that I want to analyze A = np.array([np.absolute(x) for x in np.arange(-1, 1.1, 0.1)]) #This is the autocorrelation using statsmodels's autocorrelation function plt.plot(acf(A, fft=True), "r-") #This the autocorrelation using statsmodels's correlation function # MUST set unbiased=False to get same result as acf function plt.plot(ccf(A, A, unbiased=False), "go") plt.plot(ccf(A, A), "bx") plt.show() return
def draw_ccf(ts, ts1): ay = np.array(ts) print ay.shape ay1 = np.array(ts1) print(ay1.shape) testccf = ccf(ay, ay1) print testccf
def Cross_Correlation_plot(self): df = pd.read_csv('dataVset/mortality.csv') x = df['mdeaths'] y = df['fdeaths'] # Compute Cross Correlations ccs = ss.ccf(x, y)[:100] nlags = len(ccs) # Compute the Significance level # ref: https://stats.stackexchange.com/questions/3115/cross-correlation-significance-in-r/3128#3128 conf_level = 2 / np.sqrt(nlags) # Draw Plot plt.figure("Cross Correlation plot", figsize=(12, 7), dpi=80) plt.hlines(0, xmin=0, xmax=100, color='gray') # 0 axis plt.hlines(conf_level, xmin=0, xmax=100, color='gray') plt.hlines(-conf_level, xmin=0, xmax=100, color='gray') plt.bar(x=np.arange(len(ccs)), height=ccs, width=.3) # Decoration #plt.title('$Cross\; Correlation\; Plot:\; mdeaths\; vs\; fdeaths$', fontsize=22) plt.title('Cross Correlation Plot : mdeaths vs fdeaths', fontsize=22) plt.xlim(0, len(ccs)) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) plt.show()
def find_optimum_lags(df: pd.DataFrame) -> int: """Submit a dataframe with two columns - this will find the offset needed to aligned them using crosscorrelation""" df_ = df.copy().dropna(how='any') optimum_lags = np.argmax( stattools.ccf(df_.iloc[:, 0].values, df_.iloc[:, 1].values)) print( f"Optimum offset (lag) between {df_.iloc[:,0].name} and {df_.iloc[:,1].name}: {optimum_lags}" ) return optimum_lags
def plotCCF(lcTime, lcIntA, lcIntB, **kwargs): ''' calculate cross correlation between two lc ''' corr = ccf(lcIntA, lcIntB) plt.plot(lcTime,corr,**kwargs) plt.xlabel(r"$\tau(s)$",fontsize=14) plt.ylabel(r"$\rho(\tau)$",fontsize=14) plt.title(r"Cross-correlation $\rho(\tau)$ of two LCs",fontsize=14) plt.show()
def plot(self, viz: visdom.Visdom): def strongest_correlation(coef_vars_lags: dict): values = list(coef_vars_lags.values()) keys = list(coef_vars_lags.keys()) accumulated_per_variable = np.sum(np.abs(values), axis=1) strongest_id = np.argmax(accumulated_per_variable) return keys[strongest_id], values[strongest_id] acf_variables = {} ccf_variable_pairs = {} for name, samples in self.samples.items(): if len(samples) < self.n_lags + 1: continue observations = torch.stack(samples, dim=0) observations.t_() observations = observations.numpy() active_rows_mask = list(map(np.any, np.diff(observations, axis=1))) active_rows = np.where(active_rows_mask)[0] for i, active_row in enumerate(active_rows): acf_lags = acf(observations[active_row], unbiased=False, nlags=self.n_lags, fft=True, missing='raise') acf_variables[f'{name}.{active_row}'] = acf_lags if self.with_cross_correlation: for paired_row in active_rows[i + 1:]: ccf_lags = ccf(observations[active_row], observations[paired_row], unbiased=False) ccf_variable_pairs[(active_row, paired_row)] = ccf_lags if len(acf_variables) > 0: acf_mean = np.mean(list(acf_variables.values()), axis=0) viz.bar(X=acf_mean, win='autocorr', opts=dict(xlabel='Lag', ylabel='ACF', title=f'mean Autocorrelation')) if len(ccf_variable_pairs) > 0: shortest_length = min(map(len, ccf_variable_pairs.values())) for key, values in ccf_variable_pairs.items(): ccf_variable_pairs[key] = values[:shortest_length] ccf_mean = np.mean(list(ccf_variable_pairs.values()), axis=0) viz.bar(X=ccf_mean, win='crosscorr', opts=dict(xlabel='Lag', ylabel='CCF', ytickmin=0., ytickmax=1., title=f'mean Cross-Correlation'))
def get_correlation_table(metric_df): metric_cnt = metric_df.shape[1] correlation_table = np.zeros((metric_cnt, metric_cnt)) for i in range(metric_cnt): metric_1 = metric_df.iloc[:, i] for j in range(metric_cnt): if i == j: continue else: metric_2 = metric_df.iloc[:, j] cc_ary = ccf(metric_1, metric_2, unbiased=False) correlation_table[i, j] = cc_ary[0] return correlation_table
def WindowCorrelation2(c, centreIndex, window=15): centres = getOtherCentresAverage(c, centreIndex, PRICE_CENTRE) corrs = [] l1 = len(c) l2 = len(centres[l1]) for i in xrange(0, l2 - window + 1): corr = st.ccf( c[centreIndex][PRICE_CENTRE][i: (i + window)] , centres[l1][i: (i + window)], unbiased=True) corrs.append(corr[0]) anom = [] (lower_threshold, upper_threshold) = MADThreshold(corrs) idx = pd.date_range('2006-01-01', '2015-06-23') for i in xrange(0, len(corrs)): if(corrs[i] < lower_threshold or corrs[i] > upper_threshold): anom.append(idx[i]) return anom
def phase_offset(y1, y2, period=24): r""" Find the lag or offset between time-series y1 and y2 using \ cross-correlation. Args: :param y1: Time series 1 :param y2: Time series 2 :param period: The period of one oscillation default = 24 :returns: the lag between time series 1 and 2, and the index of the max correlation. """ correlation = ccf(y2, y1) index = np.argmax(correlation[0:int(2 * period)]) return correlation, index
def find_corr(img_array,t1,t2,t3,t4,x,y,z,x1,y1,z1): threshold = shuffle(img_array,x,y,z,t1,t2,x1,y1,z1,500) series1 = img_array[x,y,z,t1:t2] series2 = img_array[x1,y1,z1,t3:t4] corr = ccf(series1,series2) print corr fig = plt.figure() x = np.arange(t2-t1) ax = fig.add_subplot(111) ax.set_ylim(-1,1) plt.plot(corr,marker='o', color='r') for i,j in zip(x,corr): ax.annotate(str("{0:.4f}".format(j)),xy=(i,j)) plt.xlabel('Time Lag') plt.ylabel('Correlation') plt.show()
def _accfMatrix(process, maxLag): import itertools process = np.array(process).T comb = list(itertools.product(process, repeat=process.shape[0])) size = int(np.sqrt(len(comb))) u = np.zeros(shape=(size**2, maxLag)) if len(comb) == 1: u[0] = acf(comb[0][0])[:maxLag] else: for i in range(len(comb)): u[i] = ccf(comb[i][0], comb[i][1])[:maxLag] arr = np.array([ np.reshape(u[:, i], newshape=(size, size)) for i in range(maxLag) ]) # u = np.reshape(u, newshape = ()) return arr
def analyze(st, rs, sim): mlp = MLPRegressor(hidden_layer_sizes=st,activation="logistic", solver="lbfgs", \ verbose=True, random_state=rs) model = mlp.fit(PHI, Y) y_hat = model.predict(PHI) y_hat_val = model.predict(PHI_val) #SIMULATION # start from initial phi, then build step by step each ne element reg_y = np.full(AR_deg, 0) reg_u = np.full(X_deg, 0) reg = np.append(reg_u, reg_y) #simulate the process y_hat_sim = [] for i in range(VAL_LENGHT + ID_LENGHT): y_i = model.predict([reg]) #simulated y_hat_sim.append(y_i) reg_y = np.append(reg_y, y_hat_sim[i])[1:] reg_u = np.append( reg_u, u[i])[1:] #append at beggining, then remove last one( [:-1]) reg = np.append(reg_u, reg_y) #PLOT identification plt.figure(figsize=(15, 8)) plt.subplot(311) plt.plot(y_hat, color='blue') plt.subplot(312) plt.plot(y_id, color='red') plt.subplot(313) plt.plot(y_hat, color='blue') plt.plot(y_id, color='red') plt.savefig("plot_id_{}_{}.png".format(sim, st), transparent=False) #plt.show() #PLOT validation plt.figure(figsize=(15, 8)) plt.subplot(311) plt.plot(y_hat_val, color='blue') plt.subplot(312) plt.plot(y_val, color='red') plt.subplot(313) plt.plot(y_hat_val, color='blue') plt.plot(y_val, color='red') plt.savefig("plot_val_{}_{}.png".format(sim, st), transparent=False) #plt.show() #MODEL VALIDATION - CORRELATION FUNCTIONS from statsmodels.tsa.stattools import acf, ccf epsilon = np.array(y_val - y_hat_val) #Autocorrelation epsilon corr_ee = acf(epsilon) #Cross-correlation u-epsilon corr_ue = ccf(u_val, epsilon, unbiased=False) #Cross-correlation epsilon ( epsilon*u) corr_e_eu = ccf(epsilon, np.multiply(epsilon[1:], u_val[1:]), unbiased=False) #Cross-correlation delta(u^2)-epsilon corr_du2_e = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)), epsilon, unbiased=False) #Cross-correlation delta(u^2)-epsilon corr_du2_e2 = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)), np.power(epsilon, 2), unbiased=False) #confidence interval -95% conf_interval_sup = 1.96 / np.sqrt(VAL_LENGHT) conf_interval_inf = -1.96 / np.sqrt(VAL_LENGHT) #Diagrams plot plt.figure(figsize=(15, 8)) plt.subplot(231) plt.title(r'$\phi_{\xi\xi}(\tau)$', fontsize=30) plt.axhline(y=conf_interval_sup, color="red") plt.axhline(y=conf_interval_inf, color="red") plt.plot(corr_ee) plt.ylim((-1, 1)) plt.subplot(232) plt.title(r'$\phi_{\xi(\xi u)}(\tau)$', fontsize=30) plt.axhline(y=conf_interval_sup, color="red") plt.axhline(y=conf_interval_inf, color="red") plt.plot(corr_e_eu) plt.ylim((-1, 1)) plt.subplot(234) plt.title(r'$\phi_{u \xi}(\tau)$', fontsize=30) plt.axhline(y=conf_interval_sup, color="red") plt.axhline(y=conf_interval_inf, color="red") plt.plot(corr_ue) plt.ylim((-1, 1)) plt.subplot(235) plt.title(r'$\phi_{u^2\xi}(\tau)$', fontsize=30) plt.axhline(y=conf_interval_sup, color="red") plt.axhline(y=conf_interval_inf, color="red") plt.plot(corr_du2_e) plt.ylim((-1, 1)) plt.subplot(236) plt.title(r'$\phi_{u^2\xi^2}(\tau)$', fontsize=30) plt.axhline(y=conf_interval_sup, color="red") plt.axhline(y=conf_interval_inf, color="red") plt.plot(corr_du2_e2) plt.ylim((-1, 1)) plt.savefig("plot_correlation_tests_{}_{}.png".format(sim, st), transparent=False) #plt.show() plt.figure(figsize=(15, 8)) plt.subplot(311) plt.plot(y_hat_sim, color='blue') plt.subplot(312) plt.plot(y, color='red') plt.subplot(313) plt.plot(y_hat_sim, color='blue') plt.plot(y, color='red') plt.savefig("plot_sim_{}_{}.png".format(sim, st), transparent=False)
# tests # DIckey Fuller from statsmodels.tsa.stattools import adfuller adfuller(air.Passengers) # Auto correlation FUnction (ACF) corr between series and lagged version from statsmodels.tsa.stattools import acf lag_acf = acf(air.Passengers, nlags = 4) # Partial ACF from statsmodels.tsa.stattools import pacf lag_Pacf = pacf(air.Passengers, nlags = 4) # CrossCorrelation Function (CCF) : The cross-correlation function is a measure of self-similarity between two timeseries. from statsmodels.tsa.stattools import ccf lag_ccf= ccf(air.Passengers, air.Passengers) # Plotting plt.subplot(221) plt.plot(timeseries, color='black', label='original') plt.plot(rolmean, color='blue', label='Rolling Mean') plt.plot(rolstd, color='red', label='Rolling Deviation') plt.legend(loc='best') plt.title('Original Data, Rolling Mean & Standard Deviation') plt.subplot(223) plt.plot(lag_pacf, color='orange', label='auto correlation func') plt.legend(loc='best') plt.title('Partial Auto Correlation Function') plt.subplot(224) plt.plot(lag_acf, color='green', label='partial auto correlation func ')
return os.path.join(data_path, filename) def cout(text): if not isinstance(text, str): text = str(text) print(text) if fcout is not None: fcout.write(text + '\n') # also output to file return #--------------------------------------------------------------------------------------------------- xcorr = lambda x, y: irfft(rfft(x) * rfft(y[::-1]) ) # cross-correlation of two numpy arrays corr = lambda x, y: np.corrcoef(x, y)[0, 1] # correlation for two numpy arrays ccf = lambda x, y: sm_tools.ccf(np.array(x), np.array(y), unbiased=True ) # cross-correlation using statsmodels returns = lambda x: np.diff(np.log(x)) # convert from prices to returns prices_ = lambda x, price0: np.exp( np.cumsum(x) ) * price0 # convert returns back to prices (price0 is initial price in original time series) prices = lambda x, price0: np.insert( prices_(x, price0), 0, price0, axis=0 ) # same as above but includes initial price in array ######################################################################################################################## """ # TEST: Decomposition of time series x = get_sample_series_streamflow() residual,seasonal,trend = decompose_time_series(x) print trend['1950':'1951']
firstValueCorr = np.zeros(len(fileList)) firstValueCorr_rev = np.zeros(len(fileList)) secondValueCorr = np.zeros(len(fileList)) secondValueCorr_rev = np.zeros(len(fileList)) xlim = np.linspace(-0.3, +0.5, 9) counter = 0 threshold = 0.613 for j in fileList: data = pd.read_csv('{}/lfp_{}.csv'.format(date,j)) ex_lfp = np.array(data['ex_lfp']) time = np.array(data['time']) ex_lfp = lc.smooth(ex_lfp,5) #in_lfp = lc.smooth(in_lfp) periods_data = lc.time_diffrences(ex_lfp) amplitude_data = lc.amps_detection(ex_lfp) crossCorrelation = ccf(amplitude_data,periods_data,unbiased=False) crossCorrelation_rev = ccf(periods_data,amplitude_data,unbiased=False) shuffling1 = lc.shuff_corr(amplitude_data,periods_data) shuffling2 = lc.shuff_corr(amplitude_data,periods_data) # Extracting First Correlation Term firstValueCorr[counter] = crossCorrelation[0] firstValueCorr_rev[counter] = crossCorrelation_rev[0] secondValueCorr[counter] = crossCorrelation[1] secondValueCorr_rev[counter] = crossCorrelation_rev[1] counter += 1 # Ploting Part plt.subplot(1,2,1) plt.plot(crossCorrelation_rev[:20],'.-',label='PAC {} '.format(round(j - 0.5,2)), alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1),color=np.random.choice(colorPallete))
def test_ccf(): ccf_x = stattools.ccf(x100[4:], x100[:-4], adjusted=False)[:21] assert_array_almost_equal(mlccf.ccf100.ravel()[:21][::-1], ccf_x, 8) ccf_x = stattools.ccf(x1000[4:], x1000[:-4], adjusted=False)[:21] assert_array_almost_equal(mlccf.ccf1000.ravel()[:21][::-1], ccf_x, 8)
plt.plot(y_val, color='red') plt.show() MSE_val = mean_squared_error(y_val, y_hat_val) print("MSE on validation: ", MSE_val) #MODEL VALIDATION - CORRELATION FUNCTIONS from statsmodels.tsa.stattools import acf, ccf epsilon = np.array(y_val - y_hat_val) u_val = np.array(u_val) #Autocorrelation epsilon corr_ee = acf(epsilon) #Cross-correlation u-epsilon corr_ue = ccf(u_val, epsilon, unbiased=False) #Cross-correlation epsilon ( epsilon*u) corr_e_eu = ccf(epsilon, np.multiply(epsilon[1:], u_val[1:]), unbiased=False) #Cross-correlation delta(u^2)-epsilon corr_du2_e = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)), epsilon, unbiased=False) #Cross-correlation delta(u^2)-epsilon corr_du2_e2 = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)), np.power(epsilon, 2), unbiased=False) #confidence interval -95%
def test_ccf(): ccf_x = tsa.ccf(x100[4:], x100[:-4], unbiased=False)[:21] assert_array_almost_equal(mlccf.ccf100.ravel()[:21][::-1], ccf_x, 8) ccf_x = tsa.ccf(x1000[4:], x1000[:-4], unbiased=False)[:21] assert_array_almost_equal(mlccf.ccf1000.ravel()[:21][::-1], ccf_x, 8)
def pairwise_ccf(ts_ref, ts_exogs, slice_start_date=None, slice_end_date=None, ccf_lag_thr=30, k=5, normalized=False, selected_lag=None, corr_th=None): # print(type(ts_exogs)) # if slice_start_date is None: # slice_start_date = ts_ref.index.min().date() # if slice_end_date is None: # slice_end_date = ts_ref.index.max().date() print("Corr analysis period:") print("From", slice_start_date, "To", slice_end_date) ts_ref_sliced = ts_ref[slice_start_date:slice_end_date] ccf_vals = list() cols = ['feature'] for i in range(1, k + 1): cols.append("max_val_" + str(i) + "_lag") cols.append("max_val_" + str(i)) ts_dic = dict() for ts_exog_name in ts_exogs: # print(ts_exogs) ts_exog = ts_exogs[ts_exog_name] # print(type(ts_exog)) # print(type(ts_exogs)) # print(ts_exog) # print(ts_exog.head()) # input("press a key") ts_candidate_sliced = ts_exog[slice_start_date:slice_end_date] if normalized: ts_ref_sliced = (ts_ref_sliced - ts_ref_sliced.values.min()) / \ (ts_ref_sliced.values.max() - ts_ref_sliced.values.min()) ts_candidate_sliced = (ts_candidate_sliced - ts_candidate_sliced.values.min()) / ( ts_candidate_sliced.values.max() - ts_candidate_sliced.values.min()) res_ccf = ccf(ts_ref_sliced.values, ts_candidate_sliced.values, unbiased=False) # print(ts_ref_sliced.shape, ts_candidate_sliced.shape) res_ccf_sub = res_ccf[:ccf_lag_thr] # print("ccf:") # print(np.around(res_ccf_sub, decimals=2)) res_ccf_sub_abs = np.abs(res_ccf_sub) indx = np.argsort(-res_ccf_sub)[:k] val = res_ccf_sub[indx] temp_list = [ts_exog.name] for i in range(k): temp_list.append(indx[i]) temp_list.append(val[i]) ccf_vals.append(temp_list) ts_dic[ts_exog.name] = ts_exog df_cc = pd.DataFrame(ccf_vals, columns=cols) # create external signals for all of the keywords lagged_ts = pd.DataFrame(data=None) if corr_th is None: for key in ts_dic: row = df_cc[df_cc['feature'] == key] if selected_lag is None: lag = row['max_val_1_lag'].values[0] else: lag = selected_lag[key] ts = ts_dic[key] ts_candidate_sliced = ts[slice_start_date:] ts_exog = ts_candidate_sliced.shift(periods=lag) ts_exog.fillna(inplace=True, value=0) lagged_ts[key] = ts_exog else: for key in ts_dic: row = df_cc[df_cc['feature'] == key] if selected_lag is None: lag = row['max_val_1_lag'].values[0] corr_val = row['max_val_1'].values[0] if corr_val < corr_th: continue else: lag = selected_lag[key] ts = ts_dic[key] ts_candidate_sliced = ts[slice_start_date:] ts_exog = ts_candidate_sliced.shift(periods=lag) ts_exog.fillna(inplace=True, value=0) lagged_ts[key] = ts_exog return df_cc, lagged_ts
import seaborn as sns import warnings warnings.filterwarnings('ignore') """ 互相关图显示了两个时间序列相互之间的滞后""" import statsmodels.tsa.stattools as stattools # Import Data df = pd.read_csv( 'https://github.com/selva86/datasets/raw/master/mortality.csv') x = df['mdeaths'] y = df['fdeaths'] # Compute Cross Correlations ccs = stattools.ccf(x, y)[:100] nlags = len(ccs) # Compute the Significance level # ref: https://stats.stackexchange.com/questions/3115/cross-correlation-significance-in-r/3128#3128 conf_level = 2 / np.sqrt(nlags) # Draw Plot plt.figure(figsize=(12, 7), dpi=80) plt.hlines(0, xmin=0, xmax=100, color='gray') # 0 axis plt.hlines(conf_level, xmin=0, xmax=100, color='gray') plt.hlines(-conf_level, xmin=0, xmax=100, color='gray') plt.bar(x=np.arange(len(ccs)), height=ccs, width=.3)
def shuff_corr(data1, data2): np.random.shuffle(data1) np.random.shuffle(data2) shuff_corr = ccf(data1, data2, unbiased=False) return shuff_corr
def crosscorrelation(time_series1, time_series2, unbiased): ccf = stats.ccf(time_series1, time_series2, unbiased=False) return ccf
) way_ganga = Water_level( r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID7' ) Niriella_ganga = Water_level( r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID8' ) denawaka_ganga = Water_level( r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID5' ) kuru_ganga = Water_level( r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID3' ) ccf_output11 = smt.ccf(kalu_ganga['water level'], kuru_ganga['water level'], unbiased=False) ccf_output12 = smt.ccf(kuru_ganga['water level'], kalu_ganga['water level'], unbiased=False) fig1, (plot11, plot12) = plt.subplots(2, 1, figsize=(10, 4.8)) plot11.set_title('Cross-correlation of kalu ganga vs kuru ganga') plot11.plot(ccf_output11) plot12.plot(ccf_output12) peaks11, _ = find_peaks(ccf_output11, height=0) peaks12, _ = find_peaks(ccf_output12, height=0) plot11.plot(peaks11, ccf_output11[peaks11], "x") plot12.plot(peaks12, ccf_output12[peaks12], "x") fig1.savefig('plot1.png') ccf_output21 = smt.ccf(kalu_ganga['water level'],
def ccf(ts1, ts2, unbiased = True): ## cross-correlation function for 1d values_ccf = stattools.ccf(ts1, ts2, unbiased)
ax2 = plt.subplot(gs1[8:, 0]) ax2.plot(t, conv) ax2.set_xlim([ilag - 5, ilag + 5]) ax2.set_ylabel('Convolution kernel') ax2.set_xlabel('lag (days)') #compute ccf function print 'scipy ccf' import scipy.signal as ss ccf = ss.correlate(x, echo2) print 'done' import statsmodels.tsa.stattools as st print 'statsm ccf' ccf = st.ccf(x[ilag + 2:], echo2[ilag + 2:]) print 'done' nccf = np.shape(ccf)[0] tccf = np.arange(nccf) - nccf / 2 ax4 = plt.subplot(gs1[8:, 2]) ax4.plot(tccf, ccf) yl = list(ax4.get_ylim()) ax4.plot([0, 0], yl, color='k', ls='--', label='ccf function') ax4.set_ylim(yl) ax4.set_ylabel('CCF(lag)') ax4.set_xlabel('lag (days)') ax4.set_xlim([-ilag - 5, ilag + 5]) #compute looped distribution of mean lags lagrange = [-50, 50]
# Autocorrelation function # Measure of correlation between the Time Series and a lagged version of itself from statsmodels.tsa.stattools import acf lag_acf = acf(timeseries, nlags= NL) # Partial Autocorrelation function # Measure of correlation between the Time Series and a lagged version of itself # after elimianting the variations by the intervening comparisons from statsmodels.tsa.stattools import pacf lag_acf = pacf(timeseries, nlags= NL) # Cross Correlation Function # Measure of self similarity between two time series from statsmodels.tsa.stattools import ccf lag_acf = ccf(timeseries1, timeseries2) # ============================================================================= # Example - from notes, week 09 # ============================================================================= from statsmodels.tsa.stattools import acf from statsmodels.tsa.stattools import pacf import matplotlib.pylab as plt import pandas as pd #-----------------Pre Process with Panda-------------------------------------# data_frame = pd.read_csv('C:/Users/Eoin/OneDrive/Data Science/UCD Data Analytics/Data Programming with Python/ReferenceFiles/Airpassenger.csv', header = 0) data_frame['Month'] = pd.to_datetime(data_frame['Month'])
def correlogram(residuals, path='', fig_name='correlogram.pdf', title=None, labels=None, model_labels=None, palette=None, n_lags=50, figsize=(8, 6), size_labels=16, size_ticks=14, size_legend=16, bottom=None, top=None, left=None, right=None, savefig=False): """ Correlogram of residuals. :type residuals: list :param residuals: list of lists (one list of residuals per event type) or list of lists of lists when multiple models are compared (one list of lists per model). :type path: string :param path: where the figure is saved. :type fig_name: string :param fig_name: name of the file. :type title: string :param title: suptitle. :type labels: list of strings :param labels: labels of the event types. :type model_labels: list of strings :param model_labels: names of the different considered models. :type palette: list of colours :param palette: color palette, one color per model. :type n_lags: int :param n_lags: number of lags to plot. :type figsize: (int, int) :param figsize: tuple (width, height). :type size_labels: int :param size_labels: fontsize of labels. :type size_ticks: int :param size_ticks: fontsize of tick labels. :type legend_size: int :param legend_size: fontsize of the legend. :type bottom: float :param bottom: between 0 and 1, adjusts the bottom margin, see matplotlib subplots_adjust. :type top: float :param top: between 0 and 1, adjusts the top margin, see matplotlib subplots_adjust. :type left: float :param left: between 0 and 1, adjusts the left margin, see matplotlib subplots_adjust. :type right: float :param right: between 0 and 1, adjusts the right margin, see matplotlib subplots_adjust. :type savefig: boolean :param savefig: set to True to save the figure. :rtype: Figure, array of Axes :return: the figure and array of figures (see matplotlib). """ # find number of models given and number of event types (dim) n_models = 1 dim = len(residuals) if type(residuals[0][0]) in [list, np.ndarray ]: # case when there is more than one model n_models = len(residuals) dim = len(residuals[0]) # set empty model labels if no labels provided if model_labels is None: model_labels = [None] * n_models v_size = dim h_size = dim if palette is None: palette = seaborn.color_palette('husl', n_models) f, fig_array = plt.subplots(v_size, h_size, figsize=figsize, sharex='col', sharey='row') if title is not None: f.suptitle(title) for i in range(v_size): for j in range(h_size): axes = None if v_size == 1 and h_size == 1: axes = fig_array elif v_size == 1: axes = fig_array[j] elif h_size == 1: axes = fig_array[i] else: axes = fig_array[i, j] axes.tick_params(axis='both', which='major', labelsize=size_ticks) # font size for tick labels if n_models == 1: max_length = min(len(residuals[i]), len(residuals[j])) ccf = stattools.ccf(np.array(residuals[i][0:max_length]), np.array(residuals[j][0:max_length]), unbiased=True) axes.plot(ccf[0:n_lags + 1], color=palette[0]) axes.set_xlim(xmin=0, xmax=n_lags) else: for m in range(n_models): max_length = min(len(residuals[m][i]), len(residuals[m][j])) ccf = stattools.ccf( np.array(residuals[m][i][0:max_length]), np.array(residuals[m][j][0:max_length]), unbiased=True) axes.plot(ccf[0:n_lags + 1], color=palette[m], label=model_labels[m]) axes.set_xlim(xmin=0, xmax=n_lags) if i + j == 0: # only add legend in the first subplot legend = axes.legend(frameon=1, fontsize=size_legend) legend.get_frame().set_facecolor('white') if labels is not None: axes.set_title(labels[i] + r'$\rightarrow$' + labels[j], fontsize=size_labels) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) if bottom != None: plt.subplots_adjust(left=left, right=right, bottom=bottom, top=top) f.text(0.5, 0.025, 'Lag', ha='center', fontsize=size_labels) f.text(0.015, 0.5, 'Correlation', va='center', rotation='vertical', fontsize=size_labels) if savefig: entire_path = os.path.join(path, fig_name) plt.savefig(entire_path) return f, fig_array
cols = ['CO(GT)', 'NO2(GT)', 'RH'] data_df = aq_df.loc[aq_df.index>'2004-10-01',cols] fig,ax = plt.subplots(3, figsize=(15,5), sharex=True) data_df.plot(ax=ax, subplots=True) plt.xlabel('') plt.tight_layout() plt.show() **Quick inspection before we proceed with modeling...** To find out whether the multivariate approach is better than treating the signals separately as univariate time series, we examine the relationship between the variables using CCF. The sample below shows the CCF for the last 100 data points of the Air quality data for CO, NO2 and RH. *CO and NO2* sample_df = data_df.iloc[-100:] ccf_y1_y2 = ccf(sample_df['CO(GT)'], sample_df['NO2(GT)'], unbiased=False) ccf_y2_y1 = ccf(sample_df['NO2(GT)'], sample_df['CO(GT)'], unbiased=False) fig, ax = plt.subplots(2, figsize=(15, 4), sharex=True, sharey=True) d=1 ax[0].stem(np.arange(len(sample_df))[::d], ccf_y1_y2[::d], linefmt='C1-', markerfmt='C1o') ax[1].stem(np.arange(len(sample_df))[::d], ccf_y2_y1[::d], linefmt='C1-', markerfmt='C1o') ax[-1].set_ylim(-1, 1) ax[0].set_xlim(0, 100) ax[-1].set_xlabel('lag $h$', fontsize=14) ax[0].set_ylabel(r'$\rho_{CO,NO2} (h)$', fontsize=14) ax[1].set_ylabel(r'$\rho_{NO2,CO} (h)$', fontsize=14) plt.tight_layout() plt.show() *CO and RH*
def abs_evaluate_predictions(predictions, annotations, results_dict): if any(isinstance(el, (list, tuple, Iterable)) for el in annotations): coint_pred_to_ann = [] coint_ann_to_pred = [] cross_correlation_list = [] kendall_list = [] spearman_list = [] pearson_list = [] kendall_pvalue_list = [] spearman_pvalue_list = [] pearson_pvalue_list = [] l1_distance_list = [] l2_distance_list = [] for pred, ann in zip(predictions, annotations): try: if len(pred) != len(ann): #print("List not equal", results_dict, pred, ann) continue else: pass #print(results_dict, pred, ann) cross_correlation = ccf(numpy.asarray(pred), numpy.asarray(ann)) cross_correlation_list.append(numpy.mean(cross_correlation)) pearson, pvalue = pearsonr(pred, ann) pearson_list.append(pearson) pearson_pvalue_list.append(pvalue) kendall, pvalue = kendalltau(pred, ann, nan_policy="omit") kendall_list.append(kendall) kendall_pvalue_list.append(pvalue) spearman, pvalue = spearmanr(pred, ann) spearman_list.append(spearman) spearman_pvalue_list.append(pvalue) l2_distance_list.append( distance.euclidean(pred, ann) / len(ann)) l1_distance_list.append( distance.cityblock(pred, ann) / len(ann)) coint_t, coint_t_p_value, coint_t_critical_values = coint( numpy.asarray(ann), numpy.asarray(pred), autolag=None) coint_ann_to_pred.append(coint_t) coint_t, coint_t_p_value, coint_t_critical_values = coint( numpy.asarray(pred), numpy.asarray(ann), autolag=None) coint_pred_to_ann.append(coint_t) except Exception as ex: print(ex) def ci(r, n, alpha=0.05): r_z = numpy.arctanh(r) se = 1 / numpy.sqrt(n - 3) z = scipy.stats.norm.ppf(1 - alpha / 2) lo_z, hi_z = r_z - z * se, r_z + z * se lo, hi = numpy.tanh((lo_z, hi_z)) return lo, hi try: results_dict[f"pearson"] = sum(pearson_list) / float( len(pearson_list)) results_dict[f"kendall"] = sum(kendall_list) / float( len(kendall_list)) results_dict[f"spearman"] = sum(spearman_list) / float( len(spearman_list)) results_dict[f"pearson_agg_stat"], results_dict[ f"pearson_pvalue"] = combine_pvalues( [x for x in pearson_pvalue_list if x > 0.0 and x < 1.0], method="mudholkar_george") results_dict[f"kendall_agg_stat"], results_dict[ f"kendall_pvalue"] = combine_pvalues( [x for x in kendall_pvalue_list if x > 0.0 and x < 1.0], method="mudholkar_george") results_dict[f"spearman_agg_stat"], results_dict[ f"spearman_pvalue"] = combine_pvalues( [x for x in spearman_pvalue_list if x > 0.0 and x < 1.0], method="mudholkar_george") results_dict[f"pearson_low_95"], results_dict[ f"pearson_high_95"] = ci(results_dict[f"pearson"], len(pearson_list)) results_dict[f"kendall_low_95"], results_dict[ f"kendall_high_95"] = ci(results_dict[f"kendall"], len(kendall_list)) results_dict[f"spearman_low_95"], results_dict[ f"spearman_high_95"] = ci(results_dict[f"spearman"], len(spearman_list)) results_dict[f"pearson_low_99"], results_dict[ f"pearson_high_99"] = ci(results_dict[f"pearson"], len(pearson_list), alpha=0.01) results_dict[f"kendall_low_99"], results_dict[ f"kendall_high_99"] = ci(results_dict[f"kendall"], len(kendall_list), alpha=0.01) results_dict[f"spearman_low_99"], results_dict[ f"spearman_high_99"] = ci(results_dict[f"spearman"], len(spearman_list), alpha=0.01) results_dict[f"l2_distance"] = sum(l2_distance_list) / float( len(l2_distance_list)) results_dict[f"l1_distance"] = sum(l1_distance_list) / float( len(l1_distance_list)) results_dict[f"first_second_cointegration"] = sum( coint_ann_to_pred) / float(len(coint_ann_to_pred)) results_dict[f"second_first_cointegration"] = sum( coint_pred_to_ann) / float(len(coint_pred_to_ann)) results_dict[f"cross_correlation"] = sum( cross_correlation_list) / float(len(cross_correlation_list)) except Exception as ex: print(ex) #results_dict[f"alpha"] = sum(alpha_list) / float(len(annotations)) else: if len(predictions) != len(annotations): #print("List not equal", results_dict, predictions, annotations) return else: pass #print(results_dict, predictions, annotations) try: cross_correlation = ccf(numpy.asarray(predictions), numpy.asarray(annotations)) results_dict[f"cross_correlation"] = numpy.mean(cross_correlation) results_dict[f"pearson"], results_dict[ f"pearson_pvalue"] = pearsonr(predictions, annotations) results_dict[f"kendall"], results_dict[ f"kendall_pvalue"] = kendalltau(predictions, annotations, nan_policy="omit") results_dict[f"spearman"], results_dict[ f"spearman_pvalue"] = spearmanr(predictions, annotations) results_dict["l2_distance"] = distance.euclidean( predictions, annotations) / len(annotations) results_dict["l1_distance"] = distance.cityblock( predictions, annotations) / len(annotations) print(results_dict, predictions, annotations) coint_t, coint_t_p_value, coint_t_critical_values = coint( numpy.asarray(annotations), numpy.asarray(predictions), autolag=None) results_dict[f"ann_to_pred_cointegration"] = coint_t results_dict[ f"ann_to_pred_cointegration_p_value"] = coint_t_p_value results_dict[f"ann_to_pred_cointegration_critical_1"], results_dict[ f"ann_to_pred_cointegration_critical_5"], \ results_dict[f"ann_to_pred_cointegration_critical_10"] = coint_t_critical_values coint_t, coint_t_p_value, coint_t_critical_values = coint( numpy.asarray(predictions), numpy.asarray(annotations), autolag=None) results_dict[f"pred_to_ann_cointegration"] = coint_t results_dict[ f"pred_to_ann_cointegration_p_value"] = coint_t_p_value results_dict[f"pred_to_ann_cointegration_critical_1"], results_dict[ f"pred_to_ann_cointegration_critical_5"], results_dict[ f"pred_to_ann_cointegration_critical_10"] = coint_t_critical_values except Exception as ex: print(ex)
def crossCorrelation(date, fileList, ex_tauSyn): #checking whether a directory exist or not: colorPallete = [u'#86232F', u'#50151C', u'#7C7C7C', u'#7F222E', u'#7C7C7C'] plt.figure(figsize=(20, 10)) sns.set_style("dark") firstValueCorr = np.zeros(len(fileList)) firstValueCorr_rev = np.zeros(len(fileList)) secondValueCorr = np.zeros(len(fileList)) secondValueCorr_rev = np.zeros(len(fileList)) xlim = np.linspace((fileList[0] - ex_tauSyn), (fileList[-1] - ex_tauSyn), len(fileList)) counter = 0 threshold = 0.613 for j in fileList: data = pd.read_csv('{}/lfp_{}.csv'.format(date, j)) ex_lfp = np.array(data['ex_lfp']) time = np.array(data['time']) ex_lfp = lc.smooth(ex_lfp, 5) #in_lfp = lc.smooth(in_lfp) periods_data = lc.time_diffrences(ex_lfp) amplitude_data = lc.amps_detection(ex_lfp) crossCorrelation = ccf(amplitude_data, periods_data, unbiased=False) crossCorrelation_rev = ccf(periods_data, amplitude_data, unbiased=False) shuffling1 = lc.shuff_corr(amplitude_data, periods_data) shuffling2 = lc.shuff_corr(amplitude_data, periods_data) # Extracting First Correlation Term firstValueCorr[counter] = crossCorrelation[0] firstValueCorr_rev[counter] = crossCorrelation_rev[0] secondValueCorr[counter] = crossCorrelation[1] secondValueCorr_rev[counter] = crossCorrelation_rev[1] counter += 1 # Ploting Part plt.subplot(1, 2, 1) plt.plot(crossCorrelation_rev[:20], '.-', label='PAC {} '.format(round(j - 0.5, 2)), alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1), color=np.random.choice(colorPallete)) plt.fill_between(range(0, 20), y1=shuffling1[:20], alpha=0.1, color=u'#86232F') plt.fill_between(range(0, 20), y1=shuffling2[:20], alpha=0.05, color=u'#86232F') plt.text(0, crossCorrelation[0], '{}'.format(round(crossCorrelation[0], 3)), alpha=(1 if (crossCorrelation[0] >= threshold) else 0)) plt.ylim(-0.2, 1) plt.title('Period-Amplitude Correlation') plt.xlabel('Steps') plt.ylabel('Correlation') plt.legend(loc='best') plt.grid(alpha=1, color='w', linestyle='--') if (j == fileList[-1]): ax = inset_axes(plt.gca(), width='45%', height='30%', loc='upper center') ax.grid(alpha=1, color='w', linestyle='--') #ax.set_ylim(0.2,0.7) #ax.set_xlim(-0.3,+0.5) ax.plot(xlim, firstValueCorr_rev, '.-', color=u'#86232F', label='1st') #ax.plot(xlim,secondValueCorr_rev,'.-',color=u'#7C7C7C',label='2nd') ax.legend(loc=2) # Plotting First Correlation Term plt.subplot(1, 2, 2) plt.plot(crossCorrelation[:20], '.-', label='APC {} '.format(round(j - 0.5, 2)), alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1), color=np.random.choice(colorPallete)) plt.fill_between(range(0, 20), y1=shuffling1[:20], alpha=0.05, color=u'#86232F') plt.fill_between(range(0, 20), y1=shuffling2[:20], alpha=0.1, color=u'#86232F') plt.text(0, crossCorrelation[0], '{}'.format(round(crossCorrelation[0], 3)), alpha=(1 if (crossCorrelation[0] >= threshold) else 0)) plt.ylim(-0.2, 1) plt.title('Amplitude-Period Correlation') plt.xlabel('Steps') plt.ylabel('Correlation') plt.legend(loc='best') plt.grid(alpha=1, color='w', linestyle='--') if (j == fileList[-1]): ax = inset_axes(plt.gca(), width='45%', height='30%', loc='upper center') ax.grid(alpha=1, color='w', linestyle='--') #ax.set_xlim(-0.3,+0.5) ax.plot(xlim, firstValueCorr, '.-', color=u'#86232F', label='1st') #ax.plot(xlim,secondValueCorr,'.-',color=u'#7C7C7C',label='2nd') ax.legend(loc=2) corrsAddress = date + '/correlations' if (os.path.isdir(corrsAddress)): pass else: os.makedirs(corrsAddress) plt.savefig('{}/crossCorr_shuffs.pdf'.format(corrsAddress)) plt.close()
import matplotlib.pyplot as plt from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.tsa.stattools import ccf dis_mat = np.array( pd.read_csv('data/distance.csv', encoding='utf-8', names=list(range(228)))) # 选出每个station距离小于5km的其他station cand_list = [[k] for k in range(228)] for i in range(228): for j in range(i + 1, 228): if dis_mat[i, j] < 5000: cand_list[i].append(j) cand_list[j].append(i) ts = [] for k in range(34): df = pd.read_csv('data/train/%i.csv' % k, encoding='utf-8', names=list(range(228))) df = np.array(df).transpose() ts.append(df) ts = np.array(ts) ts_mean = np.mean(ts, axis=0) for i in range(1): for j in cand_list[i]: print((i, j)) for k in range(34): print('for %ith file:' % k, ccf(ts[k, i, :], ts[k, j, :])[:12])
plt.fill_between(test.index, confidence_intervals[0], confidence_intervals[1], color='lightgrey') plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1]) plt.title('Model AR') plt.xlabel('Data') plt.ylabel('Zużycie [MW]') plt.legend(['test', 'AR({})'.format(lag)]) plt.grid() f.savefig(PATH_TO_PLOTS + '/timeSeriesPredTest.pdf', bbox_inches='tight') plt.show() # plot crosscorrelation f = plt.figure() plt.plot(ccf(test, y_pred, unbiased=True)) plt.title('Korelacja wzajemna szeregów test i pred') plt.xlabel('Opóźnienie') plt.ylabel('Korelacja') plt.grid() f.savefig(PATH_TO_PLOTS + '/crosscorrelation.pdf', bbox_inches='tight') plt.show() # plot residues res = y_pred - test f = plt.figure() plt.plot(res) plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1]) plt.title('Residua dla modelu AR({})'.format(lag)) plt.xlabel('Data') plt.ylabel('Residu*a')