Python ccf 예제들, statsmodels.tsa.stattools.ccf Python 예제들

예제 #1

0

파일 보기

파일: mvts_utils.py 프로젝트: phdinds-aim/time_series_handbook

def cross_corr_mat(df, yi_col, yj_col, lag=0):
    yi_yi = acf(df[yi_col].values, unbiased=False, nlags=len(df)-2)
    yj_yj = acf(df[yj_col].values, unbiased=False, nlags=len(df)-2)
    yi_yj = ccf(df[yi_col].values, df[yj_col].values, unbiased=False)
    yj_yi = ccf(df[yj_col].values, df[yi_col].values, unbiased=False)
    ccm = pd.DataFrame({yi_col: [yi_yi[lag], yj_yi[lag]],
                        yj_col: [yi_yj[lag], yj_yj[lag]]}, 
                       index=[yi_col, yj_col])
    return ccm

예제 #2

0

파일 보기

파일: statsmodels-playground.py 프로젝트: mdhatmaker/Trading-prime-python

def compare_acf_ccf():
    #this is the data series that I want to analyze
    A = np.array([np.absolute(x) for x in np.arange(-1, 1.1, 0.1)])
    #This is the autocorrelation using statsmodels's autocorrelation function
    plt.plot(acf(A, fft=True), "r-")
    #This the autocorrelation using statsmodels's correlation function
    # MUST set unbiased=False to get same result as acf function
    plt.plot(ccf(A, A, unbiased=False), "go")
    plt.plot(ccf(A, A), "bx")
    plt.show()
    return

예제 #3

0

파일 보기

def draw_ccf(ts, ts1):
    ay = np.array(ts)
    print ay.shape
    ay1 = np.array(ts1)
    print(ay1.shape)
    testccf = ccf(ay, ay1)
    print testccf

예제 #4

0

파일 보기

파일: Change.py 프로젝트: HabibaKhaledMohammed/Smart-City

def Cross_Correlation_plot(self):

    df = pd.read_csv('dataVset/mortality.csv')
    x = df['mdeaths']
    y = df['fdeaths']

    # Compute Cross Correlations
    ccs = ss.ccf(x, y)[:100]
    nlags = len(ccs)

    # Compute the Significance level
    # ref: https://stats.stackexchange.com/questions/3115/cross-correlation-significance-in-r/3128#3128
    conf_level = 2 / np.sqrt(nlags)

    # Draw Plot
    plt.figure("Cross Correlation plot", figsize=(12, 7), dpi=80)

    plt.hlines(0, xmin=0, xmax=100, color='gray')  # 0 axis
    plt.hlines(conf_level, xmin=0, xmax=100, color='gray')
    plt.hlines(-conf_level, xmin=0, xmax=100, color='gray')

    plt.bar(x=np.arange(len(ccs)), height=ccs, width=.3)

    # Decoration
    #plt.title('$Cross\; Correlation\; Plot:\; mdeaths\; vs\; fdeaths$', fontsize=22)
    plt.title('Cross Correlation Plot : mdeaths vs fdeaths', fontsize=22)
    plt.xlim(0, len(ccs))
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
    plt.show()

예제 #5

0

파일 보기

def find_optimum_lags(df: pd.DataFrame) -> int:
    """Submit a dataframe with two columns - this will find the offset needed to aligned them using crosscorrelation"""
    df_ = df.copy().dropna(how='any')
    optimum_lags = np.argmax(
        stattools.ccf(df_.iloc[:, 0].values, df_.iloc[:, 1].values))
    print(
        f"Optimum offset (lag) between {df_.iloc[:,0].name} and {df_.iloc[:,1].name}: {optimum_lags}"
    )
    return optimum_lags

예제 #6

0

파일 보기

파일: lightCurves.py 프로젝트: srmeeker/DarknessPipeline

def plotCCF(lcTime, lcIntA, lcIntB, **kwargs):
    '''
    calculate cross correlation between two lc
    '''
    corr = ccf(lcIntA, lcIntB)
    plt.plot(lcTime,corr,**kwargs)
    plt.xlabel(r"$\tau(s)$",fontsize=14)
    plt.ylabel(r"$\rho(\tau)$",fontsize=14)
    plt.title(r"Cross-correlation $\rho(\tau)$ of two LCs",fontsize=14)
    plt.show()

예제 #7

0

파일 보기

파일: autocorrelation.py 프로젝트: mmaks-softserve/MCMC_BinaryNet

    def plot(self, viz: visdom.Visdom):
        def strongest_correlation(coef_vars_lags: dict):
            values = list(coef_vars_lags.values())
            keys = list(coef_vars_lags.keys())
            accumulated_per_variable = np.sum(np.abs(values), axis=1)
            strongest_id = np.argmax(accumulated_per_variable)
            return keys[strongest_id], values[strongest_id]

        acf_variables = {}
        ccf_variable_pairs = {}
        for name, samples in self.samples.items():
            if len(samples) < self.n_lags + 1:
                continue
            observations = torch.stack(samples, dim=0)
            observations.t_()
            observations = observations.numpy()
            active_rows_mask = list(map(np.any, np.diff(observations, axis=1)))
            active_rows = np.where(active_rows_mask)[0]
            for i, active_row in enumerate(active_rows):
                acf_lags = acf(observations[active_row],
                               unbiased=False,
                               nlags=self.n_lags,
                               fft=True,
                               missing='raise')
                acf_variables[f'{name}.{active_row}'] = acf_lags
                if self.with_cross_correlation:
                    for paired_row in active_rows[i + 1:]:
                        ccf_lags = ccf(observations[active_row],
                                       observations[paired_row],
                                       unbiased=False)
                        ccf_variable_pairs[(active_row, paired_row)] = ccf_lags

        if len(acf_variables) > 0:
            acf_mean = np.mean(list(acf_variables.values()), axis=0)
            viz.bar(X=acf_mean,
                    win='autocorr',
                    opts=dict(xlabel='Lag',
                              ylabel='ACF',
                              title=f'mean Autocorrelation'))

        if len(ccf_variable_pairs) > 0:
            shortest_length = min(map(len, ccf_variable_pairs.values()))
            for key, values in ccf_variable_pairs.items():
                ccf_variable_pairs[key] = values[:shortest_length]
            ccf_mean = np.mean(list(ccf_variable_pairs.values()), axis=0)
            viz.bar(X=ccf_mean,
                    win='crosscorr',
                    opts=dict(xlabel='Lag',
                              ylabel='CCF',
                              ytickmin=0.,
                              ytickmax=1.,
                              title=f'mean Cross-Correlation'))

예제 #8

0

파일 보기

파일: correlation.py 프로젝트: Eaton18/python_code_block

def get_correlation_table(metric_df):
    metric_cnt = metric_df.shape[1]
    correlation_table = np.zeros((metric_cnt, metric_cnt))
    for i in range(metric_cnt):
        metric_1 = metric_df.iloc[:, i]
        for j in range(metric_cnt):
            if i == j:
                continue
            else:
                metric_2 = metric_df.iloc[:, j]
                cc_ary = ccf(metric_1, metric_2, unbiased=False)
                correlation_table[i, j] = cc_ary[0]
    return correlation_table

예제 #9

0

파일 보기

파일: previousWork.py 프로젝트: baldvaritesh/mtp

def WindowCorrelation2(c, centreIndex, window=15):
	centres = getOtherCentresAverage(c, centreIndex, PRICE_CENTRE)
	corrs = []
	l1 = len(c)
	l2 = len(centres[l1])
	for i in xrange(0, l2 - window + 1):
		corr = st.ccf( c[centreIndex][PRICE_CENTRE][i: (i + window)] , centres[l1][i: (i + window)], unbiased=True)
		corrs.append(corr[0])
	anom = []
	(lower_threshold, upper_threshold) = MADThreshold(corrs)
	idx = pd.date_range('2006-01-01', '2015-06-23')
	for i in xrange(0, len(corrs)):
		if(corrs[i] < lower_threshold or corrs[i]  > upper_threshold):
			anom.append(idx[i])
	return anom

예제 #10

0

파일 보기

파일: signal.py 프로젝트: robinkeegan/heat

def phase_offset(y1, y2, period=24):
    r"""
    Find the lag or offset between time-series y1 and y2 using \
    cross-correlation.

    Args:

    :param y1: Time series 1
    :param y2: Time series 2
    :param period: The period of one oscillation default = 24
    :returns: the lag between time series 1 and 2, and the index of the max correlation.

    """
    correlation = ccf(y2, y1)
    index = np.argmax(correlation[0:int(2 * period)])
    return correlation, index

예제 #11

0

파일 보기

파일: analysis.py 프로젝트: ComputationalBiology-CS-CU/FMRI-Analysis

def find_corr(img_array,t1,t2,t3,t4,x,y,z,x1,y1,z1):
	threshold = shuffle(img_array,x,y,z,t1,t2,x1,y1,z1,500)
	series1 = img_array[x,y,z,t1:t2]
	series2 = img_array[x1,y1,z1,t3:t4]
	corr = ccf(series1,series2)
	print corr

	

	fig = plt.figure()
	x = np.arange(t2-t1)
	ax = fig.add_subplot(111)
	ax.set_ylim(-1,1)
	plt.plot(corr,marker='o', color='r')
	for i,j in zip(x,corr):
		ax.annotate(str("{0:.4f}".format(j)),xy=(i,j))

	plt.xlabel('Time Lag')
	plt.ylabel('Correlation')
	plt.show()

예제 #12

0

파일 보기

파일: mleclass.py 프로젝트: SNAKE91-PC/ShareCode

    def _accfMatrix(process, maxLag):

        import itertools

        process = np.array(process).T
        comb = list(itertools.product(process, repeat=process.shape[0]))
        size = int(np.sqrt(len(comb)))

        u = np.zeros(shape=(size**2, maxLag))

        if len(comb) == 1:
            u[0] = acf(comb[0][0])[:maxLag]
        else:
            for i in range(len(comb)):
                u[i] = ccf(comb[i][0], comb[i][1])[:maxLag]

        arr = np.array([
            np.reshape(u[:, i], newshape=(size, size)) for i in range(maxLag)
        ])

        #         u = np.reshape(u, newshape = ())

        return arr

예제 #13

0

파일 보기

def analyze(st, rs, sim):
    mlp = MLPRegressor(hidden_layer_sizes=st,activation="logistic", solver="lbfgs",  \
                       verbose=True, random_state=rs)
    model = mlp.fit(PHI, Y)
    y_hat = model.predict(PHI)
    y_hat_val = model.predict(PHI_val)

    #SIMULATION
    # start from initial phi, then build step by step each ne element
    reg_y = np.full(AR_deg, 0)
    reg_u = np.full(X_deg, 0)
    reg = np.append(reg_u, reg_y)
    #simulate the process
    y_hat_sim = []
    for i in range(VAL_LENGHT + ID_LENGHT):
        y_i = model.predict([reg])  #simulated
        y_hat_sim.append(y_i)
        reg_y = np.append(reg_y, y_hat_sim[i])[1:]
        reg_u = np.append(
            reg_u,
            u[i])[1:]  #append at beggining, then remove last one( [:-1])
        reg = np.append(reg_u, reg_y)

    #PLOT identification
    plt.figure(figsize=(15, 8))
    plt.subplot(311)
    plt.plot(y_hat, color='blue')
    plt.subplot(312)
    plt.plot(y_id, color='red')
    plt.subplot(313)
    plt.plot(y_hat, color='blue')
    plt.plot(y_id, color='red')
    plt.savefig("plot_id_{}_{}.png".format(sim, st), transparent=False)
    #plt.show()

    #PLOT validation
    plt.figure(figsize=(15, 8))
    plt.subplot(311)
    plt.plot(y_hat_val, color='blue')
    plt.subplot(312)
    plt.plot(y_val, color='red')
    plt.subplot(313)
    plt.plot(y_hat_val, color='blue')
    plt.plot(y_val, color='red')
    plt.savefig("plot_val_{}_{}.png".format(sim, st), transparent=False)
    #plt.show()
    #MODEL VALIDATION - CORRELATION FUNCTIONS
    from statsmodels.tsa.stattools import acf, ccf
    epsilon = np.array(y_val - y_hat_val)
    #Autocorrelation epsilon
    corr_ee = acf(epsilon)
    #Cross-correlation u-epsilon
    corr_ue = ccf(u_val, epsilon, unbiased=False)
    #Cross-correlation epsilon ( epsilon*u)
    corr_e_eu = ccf(epsilon,
                    np.multiply(epsilon[1:], u_val[1:]),
                    unbiased=False)
    #Cross-correlation delta(u^2)-epsilon
    corr_du2_e = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)),
                     epsilon,
                     unbiased=False)
    #Cross-correlation delta(u^2)-epsilon
    corr_du2_e2 = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)),
                      np.power(epsilon, 2),
                      unbiased=False)
    #confidence interval   -95%
    conf_interval_sup = 1.96 / np.sqrt(VAL_LENGHT)
    conf_interval_inf = -1.96 / np.sqrt(VAL_LENGHT)
    #Diagrams plot
    plt.figure(figsize=(15, 8))
    plt.subplot(231)
    plt.title(r'$\phi_{\xi\xi}(\tau)$', fontsize=30)
    plt.axhline(y=conf_interval_sup, color="red")
    plt.axhline(y=conf_interval_inf, color="red")
    plt.plot(corr_ee)
    plt.ylim((-1, 1))
    plt.subplot(232)
    plt.title(r'$\phi_{\xi(\xi u)}(\tau)$', fontsize=30)
    plt.axhline(y=conf_interval_sup, color="red")
    plt.axhline(y=conf_interval_inf, color="red")
    plt.plot(corr_e_eu)
    plt.ylim((-1, 1))
    plt.subplot(234)
    plt.title(r'$\phi_{u \xi}(\tau)$', fontsize=30)
    plt.axhline(y=conf_interval_sup, color="red")
    plt.axhline(y=conf_interval_inf, color="red")
    plt.plot(corr_ue)
    plt.ylim((-1, 1))
    plt.subplot(235)
    plt.title(r'$\phi_{u^2\xi}(\tau)$', fontsize=30)
    plt.axhline(y=conf_interval_sup, color="red")
    plt.axhline(y=conf_interval_inf, color="red")
    plt.plot(corr_du2_e)
    plt.ylim((-1, 1))
    plt.subplot(236)
    plt.title(r'$\phi_{u^2\xi^2}(\tau)$', fontsize=30)
    plt.axhline(y=conf_interval_sup, color="red")
    plt.axhline(y=conf_interval_inf, color="red")
    plt.plot(corr_du2_e2)
    plt.ylim((-1, 1))
    plt.savefig("plot_correlation_tests_{}_{}.png".format(sim, st),
                transparent=False)
    #plt.show()

    plt.figure(figsize=(15, 8))
    plt.subplot(311)
    plt.plot(y_hat_sim, color='blue')
    plt.subplot(312)
    plt.plot(y, color='red')
    plt.subplot(313)
    plt.plot(y_hat_sim, color='blue')
    plt.plot(y, color='red')
    plt.savefig("plot_sim_{}_{}.png".format(sim, st), transparent=False)

예제 #14

0

파일 보기

파일: Code for exam.py 프로젝트: jackh08/Python-General

# tests
# DIckey Fuller
from statsmodels.tsa.stattools import adfuller
adfuller(air.Passengers)
    
# Auto correlation FUnction (ACF) corr between series and lagged version
from statsmodels.tsa.stattools import acf
lag_acf = acf(air.Passengers, nlags = 4)

# Partial ACF
from statsmodels.tsa.stattools import pacf
lag_Pacf = pacf(air.Passengers, nlags = 4)

# CrossCorrelation Function (CCF) : The cross-correlation function is a measure of self-similarity between two timeseries.
from statsmodels.tsa.stattools import ccf
lag_ccf= ccf(air.Passengers, air.Passengers)


# Plotting
plt.subplot(221)
plt.plot(timeseries, color='black', label='original')
plt.plot(rolmean, color='blue', label='Rolling Mean')
plt.plot(rolstd, color='red', label='Rolling Deviation')
plt.legend(loc='best')
plt.title('Original Data, Rolling Mean & Standard Deviation')
plt.subplot(223)
plt.plot(lag_pacf, color='orange', label='auto correlation func')
plt.legend(loc='best')
plt.title('Partial Auto Correlation Function')
plt.subplot(224)
plt.plot(lag_acf, color='green', label='partial auto correlation func ')

예제 #15

0

파일 보기

    return os.path.join(data_path, filename)


def cout(text):
    if not isinstance(text, str): text = str(text)
    print(text)
    if fcout is not None:
        fcout.write(text + '\n')  # also output to file
    return


#---------------------------------------------------------------------------------------------------
xcorr = lambda x, y: irfft(rfft(x) * rfft(y[::-1])
                           )  # cross-correlation of two numpy arrays
corr = lambda x, y: np.corrcoef(x, y)[0, 1]  # correlation for two numpy arrays
ccf = lambda x, y: sm_tools.ccf(np.array(x), np.array(y), unbiased=True
                                )  # cross-correlation using statsmodels
returns = lambda x: np.diff(np.log(x))  # convert from prices to returns
prices_ = lambda x, price0: np.exp(
    np.cumsum(x)
) * price0  # convert returns back to prices (price0 is initial price in original time series)
prices = lambda x, price0: np.insert(
    prices_(x, price0), 0, price0, axis=0
)  # same as above but includes initial price in array

########################################################################################################################
"""
# TEST: Decomposition of time series
x = get_sample_series_streamflow()
residual,seasonal,trend = decompose_time_series(x)
print trend['1950':'1951']

예제 #16

0

파일 보기

파일: crossCorrelation.py 프로젝트: nosratullah/timeSeriesAnalyses

firstValueCorr = np.zeros(len(fileList))
firstValueCorr_rev = np.zeros(len(fileList))
secondValueCorr = np.zeros(len(fileList))
secondValueCorr_rev = np.zeros(len(fileList))
xlim = np.linspace(-0.3, +0.5, 9)
counter = 0
threshold = 0.613
for j in fileList:
    data = pd.read_csv('{}/lfp_{}.csv'.format(date,j))
    ex_lfp = np.array(data['ex_lfp'])
    time = np.array(data['time'])
    ex_lfp = lc.smooth(ex_lfp,5)
    #in_lfp = lc.smooth(in_lfp)
    periods_data = lc.time_diffrences(ex_lfp)
    amplitude_data = lc.amps_detection(ex_lfp)
    crossCorrelation = ccf(amplitude_data,periods_data,unbiased=False)
    crossCorrelation_rev = ccf(periods_data,amplitude_data,unbiased=False)
    shuffling1 = lc.shuff_corr(amplitude_data,periods_data)
    shuffling2 = lc.shuff_corr(amplitude_data,periods_data)

    # Extracting First Correlation Term
    firstValueCorr[counter] = crossCorrelation[0]
    firstValueCorr_rev[counter] = crossCorrelation_rev[0]
    secondValueCorr[counter] = crossCorrelation[1]
    secondValueCorr_rev[counter] = crossCorrelation_rev[1]
    counter += 1

    # Ploting Part
    plt.subplot(1,2,1)
    plt.plot(crossCorrelation_rev[:20],'.-',label='PAC {} '.format(round(j - 0.5,2)),
    alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1),color=np.random.choice(colorPallete))

예제 #17

0

파일 보기

파일: test_tsa_tools.py 프로젝트: treuds/statsmodels

def test_ccf():
    ccf_x = stattools.ccf(x100[4:], x100[:-4], adjusted=False)[:21]
    assert_array_almost_equal(mlccf.ccf100.ravel()[:21][::-1], ccf_x, 8)
    ccf_x = stattools.ccf(x1000[4:], x1000[:-4], adjusted=False)[:21]
    assert_array_almost_equal(mlccf.ccf1000.ravel()[:21][::-1], ccf_x, 8)

예제 #18

0

파일 보기

plt.plot(y_val, color='red')
plt.show()

MSE_val = mean_squared_error(y_val, y_hat_val)
print("MSE on validation: ", MSE_val)

#MODEL VALIDATION - CORRELATION FUNCTIONS
from statsmodels.tsa.stattools import acf, ccf
epsilon = np.array(y_val - y_hat_val)
u_val = np.array(u_val)

#Autocorrelation epsilon
corr_ee = acf(epsilon)

#Cross-correlation u-epsilon
corr_ue = ccf(u_val, epsilon, unbiased=False)

#Cross-correlation epsilon ( epsilon*u)
corr_e_eu = ccf(epsilon, np.multiply(epsilon[1:], u_val[1:]), unbiased=False)

#Cross-correlation delta(u^2)-epsilon
corr_du2_e = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)),
                 epsilon,
                 unbiased=False)

#Cross-correlation delta(u^2)-epsilon
corr_du2_e2 = ccf(np.power(u_val, 2) - np.mean(np.power(u_val, 2)),
                  np.power(epsilon, 2),
                  unbiased=False)

#confidence interval   -95%

예제 #19

0

파일 보기

def test_ccf():
    ccf_x = tsa.ccf(x100[4:], x100[:-4], unbiased=False)[:21]
    assert_array_almost_equal(mlccf.ccf100.ravel()[:21][::-1], ccf_x, 8)
    ccf_x = tsa.ccf(x1000[4:], x1000[:-4], unbiased=False)[:21]
    assert_array_almost_equal(mlccf.ccf1000.ravel()[:21][::-1], ccf_x, 8)

예제 #20

0

파일 보기

파일: lin_corr.py 프로젝트: A-Deb/effect-discover-signal

def pairwise_ccf(ts_ref,
                 ts_exogs,
                 slice_start_date=None,
                 slice_end_date=None,
                 ccf_lag_thr=30,
                 k=5,
                 normalized=False,
                 selected_lag=None,
                 corr_th=None):
    # print(type(ts_exogs))
    # if slice_start_date is None:
    #     slice_start_date = ts_ref.index.min().date()
    # if slice_end_date is None:
    #     slice_end_date = ts_ref.index.max().date()
    print("Corr analysis period:")
    print("From", slice_start_date, "To", slice_end_date)

    ts_ref_sliced = ts_ref[slice_start_date:slice_end_date]
    ccf_vals = list()
    cols = ['feature']
    for i in range(1, k + 1):
        cols.append("max_val_" + str(i) + "_lag")
        cols.append("max_val_" + str(i))

    ts_dic = dict()
    for ts_exog_name in ts_exogs:
        # print(ts_exogs)
        ts_exog = ts_exogs[ts_exog_name]
        # print(type(ts_exog))
        # print(type(ts_exogs))
        # print(ts_exog)
        # print(ts_exog.head())
        # input("press a key")

        ts_candidate_sliced = ts_exog[slice_start_date:slice_end_date]
        if normalized:
            ts_ref_sliced = (ts_ref_sliced - ts_ref_sliced.values.min()) / \
                            (ts_ref_sliced.values.max() -
                             ts_ref_sliced.values.min())
            ts_candidate_sliced = (ts_candidate_sliced -
                                   ts_candidate_sliced.values.min()) / (
                                       ts_candidate_sliced.values.max() -
                                       ts_candidate_sliced.values.min())
        res_ccf = ccf(ts_ref_sliced.values,
                      ts_candidate_sliced.values,
                      unbiased=False)
        # print(ts_ref_sliced.shape, ts_candidate_sliced.shape)

        res_ccf_sub = res_ccf[:ccf_lag_thr]
        # print("ccf:")
        # print(np.around(res_ccf_sub, decimals=2))
        res_ccf_sub_abs = np.abs(res_ccf_sub)
        indx = np.argsort(-res_ccf_sub)[:k]
        val = res_ccf_sub[indx]

        temp_list = [ts_exog.name]
        for i in range(k):
            temp_list.append(indx[i])
            temp_list.append(val[i])
        ccf_vals.append(temp_list)
        ts_dic[ts_exog.name] = ts_exog

    df_cc = pd.DataFrame(ccf_vals, columns=cols)

    # create external signals for all of the keywords
    lagged_ts = pd.DataFrame(data=None)

    if corr_th is None:
        for key in ts_dic:
            row = df_cc[df_cc['feature'] == key]
            if selected_lag is None:
                lag = row['max_val_1_lag'].values[0]
            else:
                lag = selected_lag[key]
            ts = ts_dic[key]
            ts_candidate_sliced = ts[slice_start_date:]
            ts_exog = ts_candidate_sliced.shift(periods=lag)
            ts_exog.fillna(inplace=True, value=0)
            lagged_ts[key] = ts_exog
    else:
        for key in ts_dic:
            row = df_cc[df_cc['feature'] == key]
            if selected_lag is None:
                lag = row['max_val_1_lag'].values[0]
                corr_val = row['max_val_1'].values[0]
                if corr_val < corr_th:
                    continue
            else:
                lag = selected_lag[key]
            ts = ts_dic[key]
            ts_candidate_sliced = ts[slice_start_date:]
            ts_exog = ts_candidate_sliced.shift(periods=lag)
            ts_exog.fillna(inplace=True, value=0)
            lagged_ts[key] = ts_exog

    return df_cc, lagged_ts

예제 #21

0

파일 보기

파일: 6.4交叉相关图.py 프로젝트: slackliu/data_analysis_project

import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
"""
互相关图显示了两个时间序列相互之间的滞后"""
import statsmodels.tsa.stattools as stattools

# Import Data
df = pd.read_csv(
    'https://github.com/selva86/datasets/raw/master/mortality.csv')
x = df['mdeaths']
y = df['fdeaths']

# Compute Cross Correlations
ccs = stattools.ccf(x, y)[:100]
nlags = len(ccs)

# Compute the Significance level
# ref: https://stats.stackexchange.com/questions/3115/cross-correlation-significance-in-r/3128#3128
conf_level = 2 / np.sqrt(nlags)

# Draw Plot
plt.figure(figsize=(12, 7), dpi=80)

plt.hlines(0, xmin=0, xmax=100, color='gray')  # 0 axis
plt.hlines(conf_level, xmin=0, xmax=100, color='gray')
plt.hlines(-conf_level, xmin=0, xmax=100, color='gray')

plt.bar(x=np.arange(len(ccs)), height=ccs, width=.3)

예제 #22

0

파일 보기

def shuff_corr(data1, data2):
    np.random.shuffle(data1)
    np.random.shuffle(data2)
    shuff_corr = ccf(data1, data2, unbiased=False)
    return shuff_corr

예제 #23

0

파일 보기

파일: features_analysis.py 프로젝트: PabloVicente/MachineLearning_Stocks

def crosscorrelation(time_series1, time_series2, unbiased):
    ccf = stats.ccf(time_series1, time_series2, unbiased=False)
    return ccf

예제 #24

0

파일 보기

)
way_ganga = Water_level(
    r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID7'
)
Niriella_ganga = Water_level(
    r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID8'
)
denawaka_ganga = Water_level(
    r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID5'
)
kuru_ganga = Water_level(
    r'https://rivernet.lk/_kaluganga-overview/server/api/latest-24h?device=ID3'
)

ccf_output11 = smt.ccf(kalu_ganga['water level'],
                       kuru_ganga['water level'],
                       unbiased=False)
ccf_output12 = smt.ccf(kuru_ganga['water level'],
                       kalu_ganga['water level'],
                       unbiased=False)
fig1, (plot11, plot12) = plt.subplots(2, 1, figsize=(10, 4.8))
plot11.set_title('Cross-correlation of kalu ganga vs kuru ganga')
plot11.plot(ccf_output11)
plot12.plot(ccf_output12)
peaks11, _ = find_peaks(ccf_output11, height=0)
peaks12, _ = find_peaks(ccf_output12, height=0)
plot11.plot(peaks11, ccf_output11[peaks11], "x")
plot12.plot(peaks12, ccf_output12[peaks12], "x")
fig1.savefig('plot1.png')

ccf_output21 = smt.ccf(kalu_ganga['water level'],

예제 #25

0

파일 보기

def ccf(ts1, ts2, unbiased = True):
    ## cross-correlation function for 1d
    values_ccf = stattools.ccf(ts1, ts2, unbiased)

예제 #26

0

파일 보기

ax2 = plt.subplot(gs1[8:, 0])
ax2.plot(t, conv)
ax2.set_xlim([ilag - 5, ilag + 5])
ax2.set_ylabel('Convolution kernel')
ax2.set_xlabel('lag (days)')

#compute ccf function
print 'scipy ccf'
import scipy.signal as ss
ccf = ss.correlate(x, echo2)
print 'done'

import statsmodels.tsa.stattools as st
print 'statsm ccf'
ccf = st.ccf(x[ilag + 2:], echo2[ilag + 2:])
print 'done'
nccf = np.shape(ccf)[0]
tccf = np.arange(nccf) - nccf / 2
ax4 = plt.subplot(gs1[8:, 2])
ax4.plot(tccf, ccf)
yl = list(ax4.get_ylim())
ax4.plot([0, 0], yl, color='k', ls='--', label='ccf function')
ax4.set_ylim(yl)
ax4.set_ylabel('CCF(lag)')
ax4.set_xlabel('lag (days)')
ax4.set_xlim([-ilag - 5, ilag + 5])

#compute looped distribution of mean lags

lagrange = [-50, 50]

예제 #27

0

파일 보기

파일: Time_Series_Stationary_Correlations.py 프로젝트: eoinlarkin/python_ml_snippets

# Autocorrelation function
# Measure of correlation between the Time Series and a lagged version of itself
from statsmodels.tsa.stattools import acf
lag_acf = acf(timeseries, nlags= NL)

# Partial Autocorrelation function
# Measure of correlation between the Time Series and a lagged version of itself
# after elimianting the variations by the intervening comparisons
from statsmodels.tsa.stattools import pacf
lag_acf = pacf(timeseries, nlags= NL)

# Cross Correlation Function
# Measure of self similarity between two time series
from statsmodels.tsa.stattools import ccf
lag_acf = ccf(timeseries1, timeseries2)


# =============================================================================
# Example - from notes, week 09
# =============================================================================
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf
import matplotlib.pylab as plt
import pandas as pd

#-----------------Pre Process with Panda-------------------------------------#
data_frame = pd.read_csv('C:/Users/Eoin/OneDrive/Data Science/UCD Data Analytics/Data Programming with Python/ReferenceFiles/Airpassenger.csv',
                         header = 0)

data_frame['Month'] = pd.to_datetime(data_frame['Month'])

예제 #28

0

파일 보기

파일: test_tsa_tools.py 프로젝트: 0ceangypsy/statsmodels

def test_ccf():
    ccf_x = tsa.ccf(x100[4:], x100[:-4], unbiased=False)[:21]
    assert_array_almost_equal(mlccf.ccf100.ravel()[:21][::-1], ccf_x, 8)
    ccf_x = tsa.ccf(x1000[4:], x1000[:-4], unbiased=False)[:21]
    assert_array_almost_equal(mlccf.ccf1000.ravel()[:21][::-1], ccf_x, 8)

예제 #29

0

파일 보기

파일: plot_tools.py 프로젝트: techstartup2020/mpoints

def correlogram(residuals,
                path='',
                fig_name='correlogram.pdf',
                title=None,
                labels=None,
                model_labels=None,
                palette=None,
                n_lags=50,
                figsize=(8, 6),
                size_labels=16,
                size_ticks=14,
                size_legend=16,
                bottom=None,
                top=None,
                left=None,
                right=None,
                savefig=False):
    """
    Correlogram of residuals.

    :type residuals: list
    :param residuals: list of lists (one list of residuals per event type) or list of lists of lists when multiple models are compared (one list of lists per model).
    :type path: string
    :param path: where the figure is saved.
    :type fig_name: string
    :param fig_name: name of the file.
    :type title: string
    :param title: suptitle.
    :type labels: list of strings
    :param labels: labels of the event types.
    :type model_labels: list of strings
    :param model_labels: names of the different considered models.
    :type palette: list of colours
    :param palette: color palette, one color per model.
    :type n_lags: int
    :param n_lags: number of lags to plot.
    :type figsize: (int, int)
    :param figsize: tuple (width, height).
    :type size_labels: int
    :param size_labels: fontsize of labels.
    :type size_ticks: int
    :param size_ticks: fontsize of tick labels.
    :type legend_size: int
    :param legend_size: fontsize of the legend.
    :type bottom: float
    :param bottom: between 0 and 1, adjusts the bottom margin, see matplotlib subplots_adjust.
    :type top: float
    :param top: between 0 and 1, adjusts the top margin, see matplotlib subplots_adjust.
    :type left: float
    :param left: between 0 and 1, adjusts the left margin, see matplotlib subplots_adjust.
    :type right: float
    :param right: between 0 and 1, adjusts the right margin, see matplotlib subplots_adjust.
    :type savefig: boolean
    :param savefig: set to True to save the figure.
    :rtype: Figure, array of Axes
    :return: the figure and array of figures (see matplotlib).
    """
    # find number of models given and number of event types (dim)
    n_models = 1
    dim = len(residuals)
    if type(residuals[0][0]) in [list, np.ndarray
                                 ]:  # case when there is more than one model
        n_models = len(residuals)
        dim = len(residuals[0])
    # set empty model labels if no labels provided
    if model_labels is None:
        model_labels = [None] * n_models
    v_size = dim
    h_size = dim
    if palette is None:
        palette = seaborn.color_palette('husl', n_models)
    f, fig_array = plt.subplots(v_size,
                                h_size,
                                figsize=figsize,
                                sharex='col',
                                sharey='row')
    if title is not None:
        f.suptitle(title)
    for i in range(v_size):
        for j in range(h_size):
            axes = None
            if v_size == 1 and h_size == 1:
                axes = fig_array
            elif v_size == 1:
                axes = fig_array[j]
            elif h_size == 1:
                axes = fig_array[i]
            else:
                axes = fig_array[i, j]
            axes.tick_params(axis='both', which='major',
                             labelsize=size_ticks)  # font size for tick labels
            if n_models == 1:
                max_length = min(len(residuals[i]), len(residuals[j]))
                ccf = stattools.ccf(np.array(residuals[i][0:max_length]),
                                    np.array(residuals[j][0:max_length]),
                                    unbiased=True)
                axes.plot(ccf[0:n_lags + 1], color=palette[0])
                axes.set_xlim(xmin=0, xmax=n_lags)
            else:
                for m in range(n_models):
                    max_length = min(len(residuals[m][i]),
                                     len(residuals[m][j]))
                    ccf = stattools.ccf(
                        np.array(residuals[m][i][0:max_length]),
                        np.array(residuals[m][j][0:max_length]),
                        unbiased=True)
                    axes.plot(ccf[0:n_lags + 1],
                              color=palette[m],
                              label=model_labels[m])
                    axes.set_xlim(xmin=0, xmax=n_lags)
                if i + j == 0:  # only add legend in the first subplot
                    legend = axes.legend(frameon=1, fontsize=size_legend)
                    legend.get_frame().set_facecolor('white')
            if labels is not None:
                axes.set_title(labels[i] + r'$\rightarrow$' + labels[j],
                               fontsize=size_labels)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    if bottom != None:
        plt.subplots_adjust(left=left, right=right, bottom=bottom, top=top)
    f.text(0.5, 0.025, 'Lag', ha='center', fontsize=size_labels)
    f.text(0.015,
           0.5,
           'Correlation',
           va='center',
           rotation='vertical',
           fontsize=size_labels)
    if savefig:
        entire_path = os.path.join(path, fig_name)
        plt.savefig(entire_path)
    return f, fig_array

예제 #30

0

파일 보기

파일: 03_VectorAutoregressiveMethods.py 프로젝트: phdinds-aim/time_series_handbook

cols = ['CO(GT)', 'NO2(GT)', 'RH']
data_df = aq_df.loc[aq_df.index>'2004-10-01',cols]
fig,ax = plt.subplots(3, figsize=(15,5), sharex=True)
data_df.plot(ax=ax, subplots=True)
plt.xlabel('')
plt.tight_layout()
plt.show()

**Quick inspection before we proceed with modeling...**

To find out whether the multivariate approach is better than treating the signals separately as univariate time series, we examine the relationship between the variables using CCF. The sample below shows the CCF for the last 100 data points of the Air quality data for CO, NO2 and RH. 

*CO and NO2*

sample_df = data_df.iloc[-100:]
ccf_y1_y2 = ccf(sample_df['CO(GT)'], sample_df['NO2(GT)'], unbiased=False)
ccf_y2_y1 = ccf(sample_df['NO2(GT)'], sample_df['CO(GT)'], unbiased=False)

fig, ax = plt.subplots(2, figsize=(15, 4), sharex=True, sharey=True)
d=1
ax[0].stem(np.arange(len(sample_df))[::d], ccf_y1_y2[::d], linefmt='C1-', markerfmt='C1o')
ax[1].stem(np.arange(len(sample_df))[::d], ccf_y2_y1[::d], linefmt='C1-', markerfmt='C1o')
ax[-1].set_ylim(-1, 1)
ax[0].set_xlim(0, 100)
ax[-1].set_xlabel('lag $h$', fontsize=14)
ax[0].set_ylabel(r'$\rho_{CO,NO2} (h)$', fontsize=14)
ax[1].set_ylabel(r'$\rho_{NO2,CO} (h)$', fontsize=14)
plt.tight_layout()
plt.show()

*CO and RH*

예제 #31

0

파일 보기

def abs_evaluate_predictions(predictions, annotations, results_dict):

    if any(isinstance(el, (list, tuple, Iterable)) for el in annotations):

        coint_pred_to_ann = []
        coint_ann_to_pred = []
        cross_correlation_list = []
        kendall_list = []
        spearman_list = []
        pearson_list = []

        kendall_pvalue_list = []
        spearman_pvalue_list = []
        pearson_pvalue_list = []

        l1_distance_list = []
        l2_distance_list = []

        for pred, ann in zip(predictions, annotations):
            try:

                if len(pred) != len(ann):
                    #print("List not equal", results_dict, pred, ann)
                    continue
                else:
                    pass  #print(results_dict, pred, ann)

                cross_correlation = ccf(numpy.asarray(pred),
                                        numpy.asarray(ann))
                cross_correlation_list.append(numpy.mean(cross_correlation))

                pearson, pvalue = pearsonr(pred, ann)
                pearson_list.append(pearson)
                pearson_pvalue_list.append(pvalue)
                kendall, pvalue = kendalltau(pred, ann, nan_policy="omit")
                kendall_list.append(kendall)
                kendall_pvalue_list.append(pvalue)
                spearman, pvalue = spearmanr(pred, ann)
                spearman_list.append(spearman)
                spearman_pvalue_list.append(pvalue)

                l2_distance_list.append(
                    distance.euclidean(pred, ann) / len(ann))
                l1_distance_list.append(
                    distance.cityblock(pred, ann) / len(ann))

                coint_t, coint_t_p_value, coint_t_critical_values = coint(
                    numpy.asarray(ann), numpy.asarray(pred), autolag=None)
                coint_ann_to_pred.append(coint_t)

                coint_t, coint_t_p_value, coint_t_critical_values = coint(
                    numpy.asarray(pred), numpy.asarray(ann), autolag=None)
                coint_pred_to_ann.append(coint_t)

            except Exception as ex:
                print(ex)

        def ci(r, n, alpha=0.05):

            r_z = numpy.arctanh(r)
            se = 1 / numpy.sqrt(n - 3)
            z = scipy.stats.norm.ppf(1 - alpha / 2)
            lo_z, hi_z = r_z - z * se, r_z + z * se
            lo, hi = numpy.tanh((lo_z, hi_z))
            return lo, hi

        try:
            results_dict[f"pearson"] = sum(pearson_list) / float(
                len(pearson_list))
            results_dict[f"kendall"] = sum(kendall_list) / float(
                len(kendall_list))
            results_dict[f"spearman"] = sum(spearman_list) / float(
                len(spearman_list))

            results_dict[f"pearson_agg_stat"], results_dict[
                f"pearson_pvalue"] = combine_pvalues(
                    [x for x in pearson_pvalue_list if x > 0.0 and x < 1.0],
                    method="mudholkar_george")
            results_dict[f"kendall_agg_stat"], results_dict[
                f"kendall_pvalue"] = combine_pvalues(
                    [x for x in kendall_pvalue_list if x > 0.0 and x < 1.0],
                    method="mudholkar_george")
            results_dict[f"spearman_agg_stat"], results_dict[
                f"spearman_pvalue"] = combine_pvalues(
                    [x for x in spearman_pvalue_list if x > 0.0 and x < 1.0],
                    method="mudholkar_george")

            results_dict[f"pearson_low_95"], results_dict[
                f"pearson_high_95"] = ci(results_dict[f"pearson"],
                                         len(pearson_list))
            results_dict[f"kendall_low_95"], results_dict[
                f"kendall_high_95"] = ci(results_dict[f"kendall"],
                                         len(kendall_list))
            results_dict[f"spearman_low_95"], results_dict[
                f"spearman_high_95"] = ci(results_dict[f"spearman"],
                                          len(spearman_list))

            results_dict[f"pearson_low_99"], results_dict[
                f"pearson_high_99"] = ci(results_dict[f"pearson"],
                                         len(pearson_list),
                                         alpha=0.01)
            results_dict[f"kendall_low_99"], results_dict[
                f"kendall_high_99"] = ci(results_dict[f"kendall"],
                                         len(kendall_list),
                                         alpha=0.01)
            results_dict[f"spearman_low_99"], results_dict[
                f"spearman_high_99"] = ci(results_dict[f"spearman"],
                                          len(spearman_list),
                                          alpha=0.01)

            results_dict[f"l2_distance"] = sum(l2_distance_list) / float(
                len(l2_distance_list))
            results_dict[f"l1_distance"] = sum(l1_distance_list) / float(
                len(l1_distance_list))

            results_dict[f"first_second_cointegration"] = sum(
                coint_ann_to_pred) / float(len(coint_ann_to_pred))
            results_dict[f"second_first_cointegration"] = sum(
                coint_pred_to_ann) / float(len(coint_pred_to_ann))
            results_dict[f"cross_correlation"] = sum(
                cross_correlation_list) / float(len(cross_correlation_list))
        except Exception as ex:
            print(ex)
        #results_dict[f"alpha"] = sum(alpha_list) / float(len(annotations))

    else:

        if len(predictions) != len(annotations):
            #print("List not equal", results_dict, predictions, annotations)
            return
        else:
            pass  #print(results_dict, predictions, annotations)

        try:

            cross_correlation = ccf(numpy.asarray(predictions),
                                    numpy.asarray(annotations))
            results_dict[f"cross_correlation"] = numpy.mean(cross_correlation)

            results_dict[f"pearson"], results_dict[
                f"pearson_pvalue"] = pearsonr(predictions, annotations)
            results_dict[f"kendall"], results_dict[
                f"kendall_pvalue"] = kendalltau(predictions,
                                                annotations,
                                                nan_policy="omit")
            results_dict[f"spearman"], results_dict[
                f"spearman_pvalue"] = spearmanr(predictions, annotations)

            results_dict["l2_distance"] = distance.euclidean(
                predictions, annotations) / len(annotations)
            results_dict["l1_distance"] = distance.cityblock(
                predictions, annotations) / len(annotations)

            print(results_dict, predictions, annotations)

            coint_t, coint_t_p_value, coint_t_critical_values = coint(
                numpy.asarray(annotations),
                numpy.asarray(predictions),
                autolag=None)
            results_dict[f"ann_to_pred_cointegration"] = coint_t
            results_dict[
                f"ann_to_pred_cointegration_p_value"] = coint_t_p_value
            results_dict[f"ann_to_pred_cointegration_critical_1"], results_dict[
                f"ann_to_pred_cointegration_critical_5"], \
            results_dict[f"ann_to_pred_cointegration_critical_10"] = coint_t_critical_values

            coint_t, coint_t_p_value, coint_t_critical_values = coint(
                numpy.asarray(predictions),
                numpy.asarray(annotations),
                autolag=None)
            results_dict[f"pred_to_ann_cointegration"] = coint_t
            results_dict[
                f"pred_to_ann_cointegration_p_value"] = coint_t_p_value
            results_dict[f"pred_to_ann_cointegration_critical_1"], results_dict[
                f"pred_to_ann_cointegration_critical_5"], results_dict[
                    f"pred_to_ann_cointegration_critical_10"] = coint_t_critical_values

        except Exception as ex:
            print(ex)

예제 #32

0

파일 보기

파일: features_analysis.py 프로젝트: pablovicente/stock-index-prediction

def crosscorrelation(time_series1, time_series2, unbiased):
    ccf = stats.ccf(time_series1, time_series2, unbiased=False)
    return ccf

예제 #33

0

파일 보기

파일: main.py 프로젝트: nosratullah/timeSeriesAnalyses

def crossCorrelation(date, fileList, ex_tauSyn):
    #checking whether a directory exist or not:
    colorPallete = [u'#86232F', u'#50151C', u'#7C7C7C', u'#7F222E', u'#7C7C7C']
    plt.figure(figsize=(20, 10))
    sns.set_style("dark")
    firstValueCorr = np.zeros(len(fileList))
    firstValueCorr_rev = np.zeros(len(fileList))
    secondValueCorr = np.zeros(len(fileList))
    secondValueCorr_rev = np.zeros(len(fileList))
    xlim = np.linspace((fileList[0] - ex_tauSyn), (fileList[-1] - ex_tauSyn),
                       len(fileList))
    counter = 0
    threshold = 0.613
    for j in fileList:
        data = pd.read_csv('{}/lfp_{}.csv'.format(date, j))
        ex_lfp = np.array(data['ex_lfp'])
        time = np.array(data['time'])
        ex_lfp = lc.smooth(ex_lfp, 5)
        #in_lfp = lc.smooth(in_lfp)
        periods_data = lc.time_diffrences(ex_lfp)
        amplitude_data = lc.amps_detection(ex_lfp)
        crossCorrelation = ccf(amplitude_data, periods_data, unbiased=False)
        crossCorrelation_rev = ccf(periods_data,
                                   amplitude_data,
                                   unbiased=False)
        shuffling1 = lc.shuff_corr(amplitude_data, periods_data)
        shuffling2 = lc.shuff_corr(amplitude_data, periods_data)

        # Extracting First Correlation Term
        firstValueCorr[counter] = crossCorrelation[0]
        firstValueCorr_rev[counter] = crossCorrelation_rev[0]
        secondValueCorr[counter] = crossCorrelation[1]
        secondValueCorr_rev[counter] = crossCorrelation_rev[1]
        counter += 1

        # Ploting Part
        plt.subplot(1, 2, 1)
        plt.plot(crossCorrelation_rev[:20],
                 '.-',
                 label='PAC {} '.format(round(j - 0.5, 2)),
                 alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1),
                 color=np.random.choice(colorPallete))
        plt.fill_between(range(0, 20),
                         y1=shuffling1[:20],
                         alpha=0.1,
                         color=u'#86232F')
        plt.fill_between(range(0, 20),
                         y1=shuffling2[:20],
                         alpha=0.05,
                         color=u'#86232F')
        plt.text(0,
                 crossCorrelation[0],
                 '{}'.format(round(crossCorrelation[0], 3)),
                 alpha=(1 if (crossCorrelation[0] >= threshold) else 0))
        plt.ylim(-0.2, 1)
        plt.title('Period-Amplitude Correlation')
        plt.xlabel('Steps')
        plt.ylabel('Correlation')
        plt.legend(loc='best')
        plt.grid(alpha=1, color='w', linestyle='--')
        if (j == fileList[-1]):
            ax = inset_axes(plt.gca(),
                            width='45%',
                            height='30%',
                            loc='upper center')
            ax.grid(alpha=1, color='w', linestyle='--')
            #ax.set_ylim(0.2,0.7)
            #ax.set_xlim(-0.3,+0.5)
            ax.plot(xlim,
                    firstValueCorr_rev,
                    '.-',
                    color=u'#86232F',
                    label='1st')
            #ax.plot(xlim,secondValueCorr_rev,'.-',color=u'#7C7C7C',label='2nd')
            ax.legend(loc=2)

        # Plotting First Correlation Term
        plt.subplot(1, 2, 2)
        plt.plot(crossCorrelation[:20],
                 '.-',
                 label='APC {} '.format(round(j - 0.5, 2)),
                 alpha=(1 if (crossCorrelation[0] >= threshold) else 0.1),
                 color=np.random.choice(colorPallete))
        plt.fill_between(range(0, 20),
                         y1=shuffling1[:20],
                         alpha=0.05,
                         color=u'#86232F')
        plt.fill_between(range(0, 20),
                         y1=shuffling2[:20],
                         alpha=0.1,
                         color=u'#86232F')
        plt.text(0,
                 crossCorrelation[0],
                 '{}'.format(round(crossCorrelation[0], 3)),
                 alpha=(1 if (crossCorrelation[0] >= threshold) else 0))
        plt.ylim(-0.2, 1)
        plt.title('Amplitude-Period Correlation')
        plt.xlabel('Steps')
        plt.ylabel('Correlation')
        plt.legend(loc='best')
        plt.grid(alpha=1, color='w', linestyle='--')
        if (j == fileList[-1]):
            ax = inset_axes(plt.gca(),
                            width='45%',
                            height='30%',
                            loc='upper center')
            ax.grid(alpha=1, color='w', linestyle='--')
            #ax.set_xlim(-0.3,+0.5)
            ax.plot(xlim, firstValueCorr, '.-', color=u'#86232F', label='1st')
            #ax.plot(xlim,secondValueCorr,'.-',color=u'#7C7C7C',label='2nd')
            ax.legend(loc=2)

    corrsAddress = date + '/correlations'
    if (os.path.isdir(corrsAddress)):
        pass
    else:
        os.makedirs(corrsAddress)
    plt.savefig('{}/crossCorr_shuffs.pdf'.format(corrsAddress))
    plt.close()

예제 #34

0

파일 보기

파일: CorrelationAnalysis.py 프로젝트: GaomingOrion/TrafficPrediction

import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import ccf

dis_mat = np.array(
    pd.read_csv('data/distance.csv', encoding='utf-8', names=list(range(228))))

# 选出每个station距离小于5km的其他station
cand_list = [[k] for k in range(228)]
for i in range(228):
    for j in range(i + 1, 228):
        if dis_mat[i, j] < 5000:
            cand_list[i].append(j)
            cand_list[j].append(i)

ts = []
for k in range(34):
    df = pd.read_csv('data/train/%i.csv' % k,
                     encoding='utf-8',
                     names=list(range(228)))
    df = np.array(df).transpose()
    ts.append(df)
ts = np.array(ts)
ts_mean = np.mean(ts, axis=0)

for i in range(1):
    for j in cand_list[i]:
        print((i, j))
        for k in range(34):
            print('for %ith file:' % k, ccf(ts[k, i, :], ts[k, j, :])[:12])

예제 #35

0

파일 보기

plt.fill_between(test.index,
                 confidence_intervals[0],
                 confidence_intervals[1],
                 color='lightgrey')
plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1])
plt.title('Model AR')
plt.xlabel('Data')
plt.ylabel('Zużycie [MW]')
plt.legend(['test', 'AR({})'.format(lag)])
plt.grid()
f.savefig(PATH_TO_PLOTS + '/timeSeriesPredTest.pdf', bbox_inches='tight')
plt.show()

# plot crosscorrelation
f = plt.figure()
plt.plot(ccf(test, y_pred, unbiased=True))
plt.title('Korelacja wzajemna szeregów test i pred')
plt.xlabel('Opóźnienie')
plt.ylabel('Korelacja')
plt.grid()
f.savefig(PATH_TO_PLOTS + '/crosscorrelation.pdf', bbox_inches='tight')
plt.show()

# plot residues
res = y_pred - test
f = plt.figure()
plt.plot(res)
plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1])
plt.title('Residua dla modelu AR({})'.format(lag))
plt.xlabel('Data')
plt.ylabel('Residu*a')