Beispiel #1
0
def plot_gibbs_correlations(skills_history,
                            mean_skills,
                            names,
                            players,
                            maxlags=100,
                            filename=DEFAULT_FILENAME,
                            figsize=None):
    if figsize is not None: plt.figure(figsize=figsize)
    else: plt.figure()

    for p in players:
        trace = skills_history[p] - mean_skills[p]
        plt.xcorr(trace,
                  trace,
                  maxlags=maxlags,
                  usevlines=False,
                  linestyle="-",
                  marker=None,
                  alpha=0.7)
    plt.grid(True)
    plt.legend(["{} ({})".format(names[p], p + 1) for p in players])
    plt.xlabel("Lag")
    plt.ylabel("Auto-correlation")
    plt.title("Auto-correlations for skill traces found using Gibbs ranking")
    plt.savefig(filename)
    plt.close()
Beispiel #2
0
def plot_xcorr(tx_lfm, rx_lfm):
    plt.figure(0)
    plt.xcorr(x=rx_lfm[:, 0] + 1j * rx_lfm[:, 1],
              y=tx_lfm[:, 0] + 1j * tx_lfm[:, 1])
    plt.xlabel('Time (s)')
    plt.ylabel('Magnitude')
    plt.show(block=False)
Beispiel #3
0
def prediction_cross_correlation(real_data, test_data):
    assert isinstance(real_data, pd.Series) and isinstance(test_data, pd.Series)

    plt.title('Cross correlation')
    intersect = test_data.index.intersection(real_data.index)
    df_intersect = real_data.loc[intersect]
    plt.xcorr(df_intersect.values, test_data.values, usevlines=True, normed=True)
def cross_correlogram(spike_mat, ind_clu_1, ind_clu_2, ftsize=22):
    gs = plt.GridSpec(40, 20)
    plt.subplot(gs[0:6, 10:14])
    plt.title('Cc-gram, clusters ' + str(ind_clu_1) + ' and ' + str(ind_clu_2),
              fontsize=ftsize)
    plt.xcorr(spike_mat[ind_clu_1, :], spike_mat[ind_clu_2, :], normed=True)
    plt.xlabel('dt (ms)', fontsize=ftsize)
    plt.xticks(fontsize=ftsize)
    plt.yticks(fontsize=ftsize)
def do_xcorr():  # cross-correlation
    dataset = np.array(loadData())
    x1 = dataset[:, 0]
    x2 = dataset[:, 1]

    plt.xcorr(x1, x2, usevlines=True, maxlags=50, normed=True, lw=2)
    plt.grid(True)

    plt.show()

    return
def cross_correlation(row: np.array, data_predicted: np.array):
    fig, ax = plt.subplots(1, 1)
    train = np.array([range(0, NO_POINTS), row[0:NO_POINTS]]).T.astype(float)
    test = np.array([range(NO_TRAIN, NO_POINTS),
                     list(data_predicted)]).T.astype(float)
    plt.xcorr(train[-NO_TEST:, 1],
              test[-NO_TEST:, 1],
              maxlags=8,
              usevlines=False)
    plt.axvline(x=0, color='k', linestyle='--')
    plt.title("Cross-correlation between the predicted and real values")
    plt.xlabel('Sequence lags')
    plt.legend()
Beispiel #7
0
def random_torque():
    """ Read the entire control table and randomly sampled torque commands to the DXL.

    This is done 'N' times and timed. Relevant data is plotted.
    """
    write_torque_mode_enable(1)
    read_block = dxl_mx64.MX64.subblock('version_0', 'goal_acceleration', ret_dxl_type=use_ctypes_driver)
    times = []
    vals_dict = {'present_pos': 2 * pi/3.0, 'current': 0}
    actions = []
    currents = []
    for i in range(1000):
        t1 = time.time()
        if vals_dict['present_pos'] < pi/3.0:
            # write_torque_mode_enable(0)
            # write_pos(100)
            action = 1000
            write_torque(action)
            time.sleep(0.001)
            # write_torque_mode_enable(1)
        elif vals_dict['present_pos'] > pi:
            # write_torque_mode_enable(0)
            # write_pos(120)
            action = -1000
            write_torque(action)
            time.sleep(0.001)
            # write_torque_mode_enable(1)
        else:
            action = int(np.random.uniform(-1, 1)*1000)
        #action = 0 #1000
        write_torque(action)
        # time.sleep(0.001)
        # print("action: ", action)
        # print("pos: ", vals_dict['present_pos'])
        # print("current: ", vals_dict['current'])
        vals_dict = read(read_block)
        actions.append(action)
        currents.append(vals_dict['current'])
        times.append(time.time() - t1)
    write_torque(0)
    print(np.mean(times))
    print(currents[:10])
    plt.xcorr(currents, actions)
    #print(np.corrcoef(actions[:-1], currents[1:])[0, 1])
    plt.figure()
    plt.plot(np.cumsum(times), actions, label='actions')
    plt.plot(np.cumsum(times), currents, label='currents')
    plt.legend()
    plt.figure()
    plt.plot((times))
    plt.show()
Beispiel #8
0
def random_torque(driver, port, idn):
    """ Read the entire control table and randomly sampled torque commands to the DXL.

    This is done 'N' times and timed. Relevant data is plotted.
    """
    dxl.write_torque_mode_enable(driver, port, idn, 1)

    times = []
    vals_dict = {'present_pos': 2 * pi / 3.0, 'current': 0}
    actions = []
    currents = []
    for i in range(1000):
        t1 = time.time()
        if vals_dict['present_pos'] < pi / 3.0:
            # write_torque_mode_enable(0)
            # write_pos(100)
            action = 1000
            dxl.write_torque(driver, port, idn, action)
            time.sleep(0.001)
            # write_torque_mode_enable(1)
        elif vals_dict['present_pos'] > pi:
            # write_torque_mode_enable(0)
            # write_pos(120)
            action = -1000
            dxl.write_torque(driver, port, idn, action)
            time.sleep(0.001)
            # write_torque_mode_enable(1)
        else:
            action = int(np.random.uniform(-1, 1) * 1000)
        # action = 0 #1000
        dxl.write_torque(driver, port, idn, action)
        # time.sleep(0.001)
        # print("action: ", action)
        # print("pos: ", vals_dict['present_pos'])
        # print("current: ", vals_dict['current'])
        vals_dict = dxl.read_vals(driver, port, idn)
        actions.append(action)
        currents.append(vals_dict['current'])
        times.append(time.time() - t1)
    dxl.write_torque(driver, port, idn, 0)
    print(np.mean(times))
    print(currents[:10])
    plt.xcorr(currents, actions)
    # print(np.corrcoef(actions[:-1], currents[1:])[0, 1])
    plt.figure()
    plt.plot(np.cumsum(times), actions, label='actions')
    plt.plot(np.cumsum(times), currents, label='currents')
    plt.legend()
    plt.figure()
    plt.plot((times))
    plt.show()
def plot_xcorr(mean_right, mean_left, moved_right, moved_left, delta=0):
    plt.figure('plt.xcorr with delta {}'.format(delta))
    plt.subplot(121)
    plt.xcorr(mean_left, moved_left)
    plt.title('Left')
    plt.xlabel('lags')
    plt.ylabel('partial autocorrelation')
    plt.subplot(122)
    plt.xcorr(mean_right, moved_right)
    plt.title('Right')
    plt.xlabel('lags')
    plt.ylabel('partial autocorrelation')

    plt.show()
Beispiel #10
0
def xcor(file1, file2):
	"""
	file1: The original sound 
	file2: The distorted/delayed sound 
	"""
	original_data = read(file1)
	delayed_data = read(file2)

	print ("original data length: ", len(original_data[1]))
	print ("delayed data length: ", len(delayed_data[1]))

	if delayed_data[0] != 44100:
		print ('ERROR SR {}'.format(delayed_data[0]))
		return

	if len(delayed_data[1]) < len(original_data[1]):
		delayed = delayed_data[1]
		original = original_data[1][:len(delayed)]
	else:
		original = original_data[1]
		delayed = delayed_data[1][:len(original)]
	
	print ("adjusted original length: ", len(original))
	print ("adjusted delayed length: ", len(delayed))
	
	corr = correlate(delayed, original, "full")
	conv = convolve(delayed, np.flipud(original), "full")
	shift = len(delayed)
	lag = np.argmax(corr) - shift + 1

	print ("lag: ", lag)
	# print (corr)
	# print (len(corr))
	# print (conv)
	# print (len(conv))

	plt.xcorr(delayed, original, usevlines=True, maxlags=None, normed=True, lw=1.5)
	plt.grid(True)
	plt.axis([-shift//4,shift, -1, 1])
	# plt.axvline(np.argmax(corr)-shift+1, color='red')
	name1 = file1.split('/')[-1]
	name2 = file2.split('/')[-1]
	title = "{0} vs. {1}".format(name1, name2)
	plt.xlabel('samples')
	plt.text(lag, -0.58, 'argmax', horizontalalignment='center', color="red")
	plt.text(lag, -0.66, lag, horizontalalignment='center', color="red")
	plt.title(title)
	plt.show()
	
	return lag
Beispiel #11
0
def lag_Max(signal1, signal2, label1, label2, id):
    x = xcorr(signal1, signal2, maxlags=15)

    lags = x[0]
    c = x[1]

    print("lags", lags)
    print("c", c)

    plt.title("Cross-Correlation")
    plt.stem(lags, c, use_line_collection=True)
    plt.grid()
    plt.xlabel("Lag")
    plt.ylim(0, 1.1)
    plt.savefig("Plot_Participant/Lag/" + label1 + "_" + label2 + "_" + id +
                ".png")
    plt.close()

    i = np.argmax(np.abs(c))
    lagDiff = lags[i]
    print("lag otimo:", lagDiff)

    # plot loss during training

    return 0
Beispiel #12
0
def corrpitch(data, fs):

    energyf = np.zeros(len(data))
    f0 = np.zeros(len(data))
    #f0 and energyf initialisation, caution energyf is already a vector of zeros, so there is no need to put a else after the if

    threshold = 8
    #treshold where found by using pitch function with the visualisation

    for k in range(0, len(data)):
        energyf[k] = energy(data[k])
        #check Energy.py to see how we calculate the energy (here for one frame)

        c = plt.xcorr(data[k], data[k], maxlags=50)
        #xcorr have to give the autocorrelation of a specific frame
        x = find_peaks(c[1])

        if energyf[k] > threshold:
            f0[k] = (x[0][int(len(x[0]) / 2)] -
                     x[0][int(len(x[0]) / 2) - 1]) * 16
        #difference between the two picks

    # this function is not working as it should, still, F0 values exist when the energy of the frame is over the treshold
    # but their values seem bad

    return energyf, f0
Beispiel #13
0
def lag_Max(signal1, signal2, label1, label2, id):
    x = xcorr(signal1, signal2, maxlags=10)
    lags = x[0]
    c = x[1]

    i = np.argmax(np.abs(c))
    lagDiff = lags[i]

    extract_peak(lagDiff, signal1, signal2, label1, label2, id)
Beispiel #14
0
def autocorrelation(signal, samplefreq, fmin=50):
    n = math.ceil(samplefreq / fmin)
    tab = plt.xcorr(signal, signal, False, maxlags=n)
    x = tab[0]
    c = tab[1]
    period = find_distance(x, c)
    if (period == None):
        return None
    else:
        return (1 / period) * samplefreq
def analyse_input_data():
    data, flight_data = load_new_mensa_data(k=20)
    ypr = data[:,:3]
    w4 = data[:,-4:]
    
    #plot_hist(ypr, bins=100, labels=['Yaw', 'Pitch', 'Roll'])
    #plot_hist(w4, bins=100, labels=['w1', 'w2', 'w3', 'w4'])
    
    #pitch/forward-backward-tilt
    pitch_data = ypr[:,1]
    roll_data = ypr[:,2]
    lr_tilt_data = w4[:,0]
    fb_tilt_data = w4[:,1]
    #pitch_corr = np.correlate(pitch_data, fb_tilt_data, 'same')
    #lag_max = len(pitch_data)/2
    #plt.plot(arange(-lag_max,lag_max+1), pitch_corr)
    #plt.xcorr(fb_tilt_data, pitch_data, maxlags=20)
    plt.xcorr(lr_tilt_data, roll_data, maxlags=20)
    plt.xlabel('lags')
    plt.ylabel('correlation')
    plt.show()
Beispiel #16
0
def cross_correlation_plot(feature_one, feature_two):
    feature_one = feature_one - feature_one.mean()
    feature_two = feature_two - feature_two.mean()
    cross_correlation = correlate(feature_one, feature_two)
    cross_correlation /= (len(feature_one) * feature_one.std() *
                          feature_two.std())
    plt.xcorr(feature_one, feature_two, maxlags=5)
    absolute_cross_correlation = abs(cross_correlation)
    print("Max cross correlation", cross_correlation.max())
    print("Average cross correlation", cross_correlation[:20].mean())
    if is_significant(cross_correlation):
        statistically_significant = True
        print("and is statistically significant")
    else:
        statistically_significant = False
        print("and is not statistically significant")
    print()
    plt.show()
    cross_correlation = pd.Series(cross_correlation)
    cross_correlation.index = range(len(cross_correlation))
    return cross_correlation, statistically_significant
Beispiel #17
0
def ts_crosscorrelation(data,
                        first_col,
                        second_col,
                        diff=True,
                        m_lags=None,
                        plot=True):
    aux_df = data.copy()

    if diff is True:
        aux_df[first_col] = aux_df[first_col].pct_change()
        aux_df[second_col] = aux_df[second_col].pct_change()
        aux_df = aux_df.replace([np.inf, -np.inf], np.nan)
        aux_df = aux_df.dropna(subset=[first_col, second_col])

    if diff is True:
        print('Lag 0 pearson correlation differenciated (pct change): ',
              pearsonr(aux_df[first_col], aux_df[second_col]))
    else:
        print('Lag 0 pearson correlation NOT differenciated: ',
              pearsonr(aux_df[first_col], aux_df[second_col]))

    if m_lags is None:
        m_lags = int(len(aux_df[first_col]) / 10)
        if m_lags < 20:
            m_lags = 20

    if plot is True:
        fig = plt.figure(figsize=(15, 10))
        plt.xcorr(aux_df[first_col], aux_df[second_col], maxlags=m_lags)
        plt.title('Cross Correlation Plot Between ' + first_col + ' and ' +
                  second_col)
        fig.show()

    lags, corr, lines, b = plt.xcorr(aux_df[first_col],
                                     aux_df[second_col],
                                     maxlags=m_lags)
    res = pd.DataFrame(lags, columns=['lags'])
    res['corr'] = corr

    return res
def cross_correl_2variable(var_in, num_lag, dicinp, plotpath):
    """
    Plots and saves the linear global cross-correlation of a track using
    matplotlib.pyplot.xcorr()
    
    Args:
        var_in = dictionary with variables (keys=variable name, values=1D ndarray)
        num_lag = positive integer with the maximum number of lags
        dicinp = dictionary with info about the graph {'plttile': plot title,
           'filename':name of file}
        plotpath = string with the path of the plot
    Returns:
        None
    """
    # Assign to variables
    ssh = var_in['SRAL']
    slstr = var_in['SLSTR']

    font = {'size': 18}
    plt.rc('font', **font)

    # Cross corel
    fig = plt.figure(figsize=(18, 10))
    # cross-correlation x1 vs x2
    plt.xcorr(ssh, slstr, usevlines=True, maxlags=num_lag, normed=True, lw=2)
    plt.title(dicinp['plttitle'], fontsize=23)
    plt.xlabel('Lag [# elements]', fontsize=18)
    plt.ylabel('Cross-Corr', fontsize=18)
    plt.ylim([-1, 1])
    plt.xlim([-num_lag, num_lag])
    plt.grid(True)

    plt.savefig(plotpath + '\\' + dicinp['filename'] + '.png',
                dpi=300,
                bbox_inches='tight')

    plt.close('all')

    return None
Beispiel #19
0
def plotXCorrel(data1,data2,units,title,filename):
    """
    A function to create cross-correlation charts.  95%
    confidence intervals are generated: +- 2/sqrt(n)

    Parameters:
    -----------
    data1: the array that remains static (i.e. not shifted)
    data2: the array that is shifted
    units : string.  Units of time that are being plotted (e.g. months, years).
    """

    confid_int_a = 2.0/(math.sqrt(len(data1)))
    confid_int_b = -2.0/(math.sqrt(len(data1)))

    plt.xcorr(data1,data2,maxlags=None)
    plt.ylabel('Cross-correlation [-1,1]')
    plt.xlabel('Lag ('+units+')')
    plt.axhline(y = confid_int_a,ls='dashed')
    plt.axhline(y = confid_int_b,ls='dashed')
    plt.title(title)
    savefig(filename)
    plt.close()
    return
Beispiel #20
0
def plotXCorrel(data1, data2, units, title, filename):
    """
    A function to create cross-correlation charts.  95%
    confidence intervals are generated: +- 2/sqrt(n)

    Parameters:
    -----------
    data1: the array that remains static (i.e. not shifted)
    data2: the array that is shifted
    units : string.  Units of time that are being plotted (e.g. months, years).
    """

    confid_int_a = 2.0 / (math.sqrt(len(data1)))
    confid_int_b = -2.0 / (math.sqrt(len(data1)))

    plt.xcorr(data1, data2, maxlags=None)
    plt.ylabel('Cross-correlation [-1,1]')
    plt.xlabel('Lag (' + units + ')')
    plt.axhline(y=confid_int_a, ls='dashed')
    plt.axhline(y=confid_int_b, ls='dashed')
    plt.title(title)
    savefig(filename)
    plt.close()
    return
Beispiel #21
0
def autocorrelation(audiopath):
    # Initialisation
    sample_rate, samples_int = read(audiopath)
    samples = np.array(samples_int)
    samples = mono(samples)
    threshold = 2.5
    width = 30
    step = 10
    maxl = (width * sample_rate / 1000) - 1
    maxl = int(maxl)
    # Normalize and split signal
    norm_samples = normalize(samples)
    frames = split(norm_samples, sample_rate, width, step)
    # Compute energy for each frames
    energy = []
    for i in range(len(frames)):
        energy.append(compute_energy(frames[i]))
    # Sort voiced and unvoiced
    to_check = np.array(np.zeros(len(energy)))
    for i in range(len(frames)):
        if energy[i] > threshold:
            to_check[i] = 1
    # Compute autocorrelation of each frame
    autocorrs = []
    for i in range(len(frames)):
        if to_check[i] == 1:
            a, tmp, b, c = plt.xcorr(frames[i], frames[i], maxlags=maxl)
            tmp = tmp[maxl:]
            autocorrs.append(tmp)
        else:
            autocorrs.append(np.zeros(maxl + 1))
    # Find distances between the two highest peaks for each autocorrelated frames
    peaks = []
    for i in range(len(autocorrs)):
        if to_check[i] == 1:
            peaks_tmp, peaks_tmp_prop = find_peaks(autocorrs[i], height=0)
            index_max = peaks_tmp[np.argmax(peaks_tmp_prop["peak_heights"])]
            peaks.append(index_max)
        else:
            peaks.append(0)
    # Compute fondamental frequencies from distances between peaks and sample rate
    f_zeros = []
    for i in range(len(peaks)):
        if to_check[i] == 1 and peaks[i] != 0:
            f_zeros.append(sample_rate / peaks[i])
        else:
            f_zeros.append(0)
    return f_zeros
def estimateF0(xFrame, fs):
    """autocorr Based"""

    F0 = 0
    lags, c, line, b = plt.xcorr(
        xFrame, xFrame, normed=True, usevlines=True,
        maxlags=320)  # c = taux de correlation, l'ordonnée. 50Hz => 320
    plt.title("xcorr empilement")
    #affichage chelou, ils s'empilent tous?
    peaks, _ = signal.find_peaks(c)
    middleIndex = int((np.size(peaks) - 1) / 2)
    if (np.size(peaks) != 1):
        T0 = (peaks[middleIndex + 1] - peaks[middleIndex])
        F0 = 1 / T0 * fs

    return F0
Beispiel #23
0
def xCorrF0(xFrame,fs):
    
    """  returns fundamental frequency of a frame using xcorr
        arguments: xFrame = one frame of the signal
                    fs= sampling frequency
            returns F0: fundamental frequency of the frame"""
    
    F0=0
    lags,c, line, b=plt.xcorr(xFrame, xFrame, normed=True, usevlines=True, maxlags=320) # c = taux de correlation, l'ordonnée. 50Hz => 320
    peaks, _=signal.find_peaks(c)
    middleIndex = int((np.size(peaks) - 1)/2)
    if(np.size(peaks) != 1):
        F0=(peaks[middleIndex+1]-peaks[middleIndex])
        """ peaks donne les positions des peaks ! donc position milieu +1 - position du milieu = f0 """
        
    return F0
Beispiel #24
0
def correlate(audio1, audio2):
    # TODO find out whether audio1 highly correlates with audio2
    fs1, sig1 = wavfile.read(audio1)
    fs2, sig2 = wavfile.read(audio2)
   
    shortWidth = 10
    shortLen = len(sig1) / shortWidth
   
    sig1Short = array([sig1[shortWidth * i] for i in xrange(shortLen)])
    sig2Short = array([sig2[shortWidth * i] for i in xrange(shortLen)])
    
    sig1Norm = pcm2float(sig1Short, 'float32')
    sig2Norm = pcm2float(sig2Short, 'float32')
    
    sig1fft = fft.fft(sig1Norm)
    sig2fft = fft.fft(sig2Norm)
    #lags, c, line, b = plt.xcorr(sig1Norm, sig2Norm, maxlags=None)
    lag, c, line, b = plt.xcorr(absolute(sig1fft),absolute(sig2fft))
    maxC = amax(c)
    print("max correlation is %f" % maxC)
    return maxC
Beispiel #25
0
def plot_cross_corr(y: pd.Series, x: pd.Series, max_lag=12):

    # 相关系数的统计检验、画图
    # fig = plt.figure()
    # ax1 = fig.add_subplot(111)
    # ax1.xcorr(x, y, usevlines=True, maxlags=max_lag, normed=True)
    # ax1.grid(True)
    # ax1.axhline(0, color='black')
    lags, c, _, _ = plt.xcorr(x=x,
                              y=y,
                              usevlines=True,
                              maxlags=max_lag,
                              normed=True)
    plt.scatter(lags, c, marker='o')
    # plt.xlabel('Lags of {0} corresponding to {1}'.format(x.name, y.name))
    # plt.title('Cross Correlation Coefficients')
    plt.xlabel('{0} 相对于 {1} 的滞后期'.format(x.name, y.name))
    plt.ylim(-1, 1)
    plt.title('时差相关系数')
    # plt.show()

    return lags, c
Beispiel #26
0
fig_3 = well_name + ' filtered data O1'
fig_4 = well_name + ' filtered data M2'
fig_5 = well_name + ' correlations '

#multipage pdf figures
pp = PdfPages('fs'+os.path.splitext(wlfile)[0]+'.pdf')


#figure 2
fig_2 = well_name + ' TA-TB'
plt.xkcd()
plt.figure(fig_2)
plt.suptitle(fig_2, x=0.2, fontsize='small')
plt.title(os.path.splitext(wlfile)[0])
plt.subplot(2,1,1)
plt.xcorr(ta_O1[1],tb_O1[1],maxlags=20)
plt.ylim(-1.1,1.1)
plt.tick_params(which='both',labelsize=8)
plt.xlabel('lag (hrs)',fontsize='small')
plt.ylabel('correl',fontsize='small')
plt.title('Cross Correl O1',fontsize='small')
plt.subplot(2,1,2)
plt.xcorr(ta_M2[1],tb_M2[1],maxlags=20)
plt.ylim(-1.1,1.1)
plt.tick_params(which='both',labelsize=8)
plt.xlabel('lag (hrs)',fontsize='small')
plt.ylabel('correl',fontsize='small')
plt.title('Cross Correl M2',fontsize='small')
plt.tight_layout()
pp.savefig()
plt.close()
plt.plot(lag_time, lagmod, 'r--', label='wl modeled w bp&et')
plt.legend(loc=4, fontsize='small')
plt.xlim(0,lag)
plt.ylabel('change (ft)')
plt.xlabel('time (hrs)')
plt.tight_layout()
pp.savefig()
plt.close()

#figure 2
fig_2 = well_name + ' signal processing'
plt.figure(fig_2)
plt.suptitle(fig_2, x=0.2, fontsize='small')
plt.title(os.path.splitext(wlfile)[0])
plt.subplot(2,1,1)
plt.xcorr(dl_O1[1],wl_O1[1],maxlags=500)
plt.ylim(-1.1,1.1)
plt.tick_params(which='both',labelsize=8)
plt.xlabel('lag (min)',fontsize='small')
plt.ylabel('correl',fontsize='small')
plt.title('Cross Correl O1',fontsize='small')
plt.subplot(2,1,2)
plt.xcorr(dl_M2[1],wl_M2[1],maxlags=500)
plt.ylim(-1.1,1.1)
plt.tick_params(which='both',labelsize=8)
plt.xlabel('lag (min)',fontsize='small')
plt.ylabel('correl',fontsize='small')
plt.title('Cross Correl M2',fontsize='small')
plt.tight_layout()
pp.savefig()
plt.close()

r_ac = sig.fftconvolve(r_24hr[0], r_24hr[0], mode = 'same')

r_ac = np.ones(r_24hr.shape);
for w in range(nworms):
  r_ac[wid,:] =  sig.fftconvolve(r_24hr[wid], r_24hr[wid], mode = 'same'); 

r_ac_avg = np.nanmean(r_ac, axis = 0)


fig = plt.figure(799); plt.clf();
cc = [];
for wid in range(nworms):
  dd = r_24hr[wid, transitions_cor[wid,0]:transitions_cor[wid,-1]];
  (l, c, a,b) = plt.xcorr(dd, dd, maxlags = 600, usevlines = False, marker = '.');
  cc.append(c);
  
cc_bs = [];
for wid in range(nworms):
  dd = r_24hr[wid, transitions_cor[wid,0]:transitions_cor[wid,-1]];
  (l, c, a,b) = plt.xcorr(dd, np.random.permutation(dd), maxlags = 600, usevlines = False, marker = '.');
  cc_bs.append(c);  
  

fig = plt.figure(799); plt.clf();
for c in cc:
  plt.plot(l,c, 'gray')
plt.plot(l, np.nanmean(np.array(cc), axis = 0), 'r', linewidth = 2)

for c in cc_bs:
Beispiel #29
0
def xcorr(*args, **kwargs):
    r"""starkplot wrapper for xcorr"""
    return _pyplot.xcorr(*args, **kwargs)
#cormax = numpy.argmax(correl[len(correl)/2-20:len(correl)/2-20])
#print cormax

plt.figure(12)
plt.plot(xdata,yfilt_O1,'r')
plt.twinx()
plt.plot(xdata,zfilt_O1,'b')
plt.title('O1')
plt.figure(27)
plt.plot(xdata,yfilt_M2,'r')
plt.twinx()
plt.plot(xdata,zfilt_M2,'b')
plt.title('M2')
plt.figure(29)
plt.xcorr(yfilt_O1,zfilt_O1,maxlags=10)
plt.title('Cross Correl O1')
plt.figure(30)
plt.xcorr(yfilt_M2,zfilt_M2,maxlags=10)
plt.title('Cross Correl M2')
plt.draw()

###############################################################################
#
########################################################### Regression Analysis
#
###############################################################################

#define starting values
x0 = numpy.array([sum(zdata)/float(len(zdata)), 1.5, 1.5])
Beispiel #31
0
def plot_idx_func():
    """
    """

    idx = get_indices()

    for k in idx:
        #for k in ['AEX']:
        ii = idx[k]  #array index, k=noun index
        fechas = ii[0]  #date index
        price = ii[1]  #price index

        #reverse fechas
        fechas = fechas[-1::-1]

        #to reverse data
        price = price[-1::-1]

        t = np.arange(len(price))

        #get date position
        pos = []

        pos.append(np.where(fechas == '05.01.2001')[0])
        pos.append(np.where(fechas == '02.01.2004')[0])
        pos.append(np.where(fechas == '02.01.2008')[0])
        pos.append(np.where(fechas == '02.01.2014')[0])
        pos.append(np.where(fechas == '02.01.2019')[0])
        pos = np.array(pos)
        pos = pos.flatten()
        #pos = pos[-1::-1]

        print(pos, k)

        #plot each index
        plt.figure()
        plt.plot(price, label=k)
        print(k)
        plt.legend(loc='upper right')
        plt.xticks(pos, fechas[pos], size='small', rotation=45)
        title('Representación índices')
        xlabel('Periodos más representativos')
        ylabel('Precios de cierre')

        #index with colours
        plt.figure()
        plt.plot(t[:pos[0]], price[0:pos[0]])
        plt.plot(t[pos[0]:pos[1]], price[pos[0]:pos[1]])
        plt.plot(t[pos[1]:pos[2]], price[pos[1]:pos[2]])
        plt.plot(t[pos[2]:pos[3]], price[pos[2]:pos[3]])
        plt.plot(t[pos[3]:pos[4]], price[pos[3]:pos[4]])
        plt.xticks(pos, fechas[pos], size='small', rotation=45)
        suptitle('Representación índices')
        title(k)
        xlabel('Periodos más representativos')
        ylabel('Precios de cierre')
        plt.tight_layout(
        )  #sirve para que el eje x se vea cuando guardo la imagen

        #segment each index
        segment1 = price[:pos[1]]
        segment2 = price[pos[1] + 1:pos[2]]
        segment3 = price[pos[2] + 1:pos[3]]
        segment4 = price[pos[3] + 1:pos[4]]

        #xcorr for segment1
        x = segment1
        y = segment1
        plt.figure()
        xcorr1 = plt.xcorr(x, y, normed=True, usevlines=True, maxlags=None)
        #plt.plot(xcorr1[0],xcorr1[1],'-')
        #title('Autocorrelación segmento 1')

        #xcorr for segment2
        x = segment2
        y = segment2
        plt.figure()
        xcorr2 = plt.xcorr(x, y, normed=True, usevlines=True, maxlags=None)
        #plt.plot(xcorr2[0],xcorr2[1],'-')
        #title('Autocorrelación segmento 2')

        #xcorr for segment3
        x = segment3
        y = segment3
        plt.figure()
        xcorr3 = plt.xcorr(x, y, normed=True, usevlines=True, maxlags=None)
        #plt.plot(xcorr3[0],xcorr3[1],'-')
        #title('Autocorrelación segmento 3')

        #PREGUNTAR SI EN LA MEMORIA, EN EL CASO DE QUERER LAS AUTOCORRELACIONES SEPARADAS, PONGO LA FIGURA QUE SALE NEGRA O EL CONTORNO SOLO
        #xcorr for segment4
        x = segment4
        y = segment4
        plt.figure()
        xcorr4 = plt.xcorr(x, y, normed=True, usevlines=True, maxlags=None)
        #plt.plot(xcorr4[0],xcorr4[1],'-')
        #title('Autocorrelación segmento 4')

        #Representación de todas las autocorrelaciones juntas
        plt.figure()
        plt.plot(xcorr1[0],
                 xcorr1[1],
                 '-',
                 label=('Autocorrelación segmento 1'))
        plt.plot(xcorr2[0],
                 xcorr2[1],
                 '-',
                 label=('Autocorrelación segmento 2'))
        plt.plot(xcorr3[0],
                 xcorr3[1],
                 '-',
                 label=('Autocorrelación segmento 3'))
        plt.plot(xcorr4[0],
                 xcorr4[1],
                 '-',
                 label=('Autocorrelación segmento 4'))
        title(k)
        suptitle("Autocorrelación")
        plt.legend(loc='upper right')

        #hurst
        hurst1 = hurst(segment1)
        hurst2 = hurst(segment2)
        hurst3 = hurst(segment3)
        hurst4 = hurst(segment4)
        h = np.array([hurst1, hurst2, hurst3, hurst4])
        #plt.figure()
        #plt.plot(h,'o', label = k)
        #plt.legend(loc='upper right')
        #suptitle('Exponente de Hurst')
        #title(k)

        #spectrum, get the last value for w1a
        spectrum1 = spectrum1f(segment1)
        w1 = spectrum1[-2]
        spectrum2 = spectrum1f(segment2)
        w2 = spectrum2[-2]
        spectrum3 = spectrum1f(segment3)
        w3 = spectrum3[-2]
        spectrum4 = spectrum1f(segment4)
        w4 = spectrum4[-2]
        spect = np.array([w1, w2, w3, w4])
        #plt.figure()
        #plt.plot(h,'o', label = k)
        #plt.legend(loc='upper right')
        #suptitle('Spectrum')
        #title(k)

        #Vamos a sacar la h estimada H=(beta-1)/2
        h1_est = (np.abs(w1) - 1) / 2
        h2_est = (np.abs(w2) - 1) / 2
        h3_est = (np.abs(w3) - 1) / 2
        h4_est = (np.abs(w4) - 1) / 2
        h_est = np.array([h1_est, h2_est, h3_est, h4_est])

        hh, h_1f = bootstrap_index(k, segment1, segment2, segment3, segment4,
                                   h, spect)
    return price, hh, h_1f
Beispiel #32
0
def beatestimate(BeatStrength, sr, hop, startbpm, beat_tightness):
    # Function:
    # Beats=beatestimate(BeatStrength,sr,hop,startbpm,...
    #        beat_tightness)
    #
    # This function estimates the beat times from the beat strength.
    #
    # INPUTS  - BeatStrength. The strength of the beat, time windowed by a stft
    #         - sr. Sample rate of BeatStrength (note this might have been
    #           resampled.
    #         - hop. number of samples hopped in BeatStrength
    #         - startbpm. The start point of the search.
    #         - tightness. How tightly to stick to the startbpm.
    #
    # OUTPUTS - Beats. The beat times in seconds
    #
    # ---------------------------------------------
    # Function created by M. McVicar
    # Function revised by Y. Ni
    # Intelligent Systems Lab
    # University of Bristol
    # U.K.
    # 2011

    Beats = 0

    # 1. Estimate rough start bpm

    # Find rough global period (empirically, 0s-90s)
    duration_time = 90.0
    # in seconds
    upper_time_zone = 90.0
    # in seconds
    bpm_std = 0.7
    # the variance of the bpm window
    alpha = 0.8
    # a update weight for part 3.

    # sample rate for specgram frames (due to the hop_length)
    import numpy as np

    fftres = np.true_divide(sr, hop)

    # Get the lower bound and the upper bound in the beat strength vector
    maxcol = int(min(np.round(upper_time_zone * fftres), len(BeatStrength) - 1))
    mincol = int(max(1, maxcol - np.round(duration_time * fftres)))

    # Use auto-correlation out of 4 seconds (empirically set?)
    acmax = int(np.round(4 * fftres))

    # Get autocorrelation of signal
    import matplotlib.pyplot as plt

    # matlab auto zero-pads, python doesn't
    if BeatStrength.shape[0] >= acmax:
        rrr = plt.xcorr(
            BeatStrength[range(mincol - 1, maxcol)],
            BeatStrength[range(mincol - 1, maxcol)],
            normed=False,
            maxlags=acmax,
        )
        xcr = rrr[1]
    else:
        maxlaglen = len(BeatStrength[range(mincol - 1, maxcol)]) - 1
        rrr = plt.xcorr(
            BeatStrength[range(mincol - 1, maxcol)],
            BeatStrength[range(mincol - 1, maxcol)],
            normed=False,
            maxlags=maxlaglen,
        )
        xcr = rrr[1]
        des_len = acmax * 2 + 1
        npad = (des_len - len(xcr)) / 2
        xcr = np.hstack([np.zeros(npad), xcr, np.zeros(npad)])

    # Find local max in the global auto-correlation
    rawxcr = xcr[range(acmax, 2 * acmax + 1)]
    # The right side of correlation part

    # Creating a hamming like window around default bpm
    bpms = 60.0 * np.true_divide(fftres, (np.add(range(acmax + 1), 0.1)))

    num = np.log(np.true_divide(bpms, startbpm)) * bpm_std
    denom = np.log(2.0)
    div = np.true_divide(num, denom)
    xcrwin = np.exp(-0.5 * div ** 2.0)

    # The weighted auto-correlation
    xcr = rawxcr * xcrwin

    # %Add in 2x, 3x, choose largest combined peak
    # lxcr = length(xcr);
    # xcr00 = [0, xcr, 0];
    # xcr2 = xcr(1:ceil(lxcr/2))+.5*(.5*xcr00(1:2:lxcr)+xcr00(2:2:lxcr+1)+.5*xcr00(3:2:lxcr+2));
    # xcr3 = xcr(1:ceil(lxcr/3))+.33*(xcr00(1:3:lxcr)+xcr00(2:3:lxcr+1)+xcr00(3:3:lxcr+2));
    #
    #
    # %Get the bpm position of the peak
    # if max(xcr2) > max(xcr3)
    #   [vv, startpd] = max(xcr2);
    #   startpd = startpd -1;
    #   startpd2 = startpd*2;
    # else
    #   [vv, startpd] = max(xcr3);
    #   startpd = startpd -1;
    #   startpd2 = startpd*3;
    # end

    # %Get the local max (the picks)
    xpks = localmax(xcr)

    # Not include any peaks in first down slope (before goes below
    # zero for the first time)
    xpks[range(np.min(np.nonzero(xcr < 0)))] = 0

    # Largest local max away from zero
    maxpk = np.max(xcr[xpks])

    # Find the position of the first largest pick
    z = np.nonzero((xpks * xcr) == maxpk)
    startpd = z[0]
    startpd = startpd[0]

    # Choose best peak out of .33 .5 2 3 x this period
    candpds = np.round(np.multiply([0.33, 0.5, 2.0, 3.0], startpd)).astype(int)
    candpds = candpds[candpds < acmax]

    bestpd2 = np.argmax(xcr[candpds]) + 1
    # to match matlab
    startpd2 = candpds[bestpd2]

    # %Weight startpd and startpd2
    # pratio = xcr(1+startpd)/(xcr(1+startpd)+xcr(1+startpd2));
    # if (pratio>0.5)
    #     startbpm=(60*fftres)/startpd;
    # else
    #     startbpm=(60*fftres)/startpd2;
    # end

    # Always use the faster one
    startbpm = np.true_divide(60.0 * fftres, np.minimum(startpd, startpd2))

    ### 1. Smooth the beat strength ###

    # BeatStrength=BeatStrength/std(BeatStrength);

    startpd = int(round(60.0 * fftres) / startbpm)
    pd = startpd

    # Smooth beat events with a gaussian window
    templt = np.exp(-0.5 * (((range(-pd, pd + 1)) / (np.true_divide(pd, 32.0))) ** 2.0))

    # convolve the window with the BeatStrength
    import scipy.signal

    localscore = scipy.signal.fftconvolve(templt, BeatStrength)
    localscore = localscore[np.add(int(round(len(templt) / 2.0)), range(BeatStrength.shape[0]))]

    ### 2.Initialise ###

    backlink = np.zeros(localscore.shape[0])
    cumscore = np.zeros(localscore.shape[0])

    #  search range for previous beat. prange is the number of samples to look
    #  back and forward
    # prange = round(-2*pd):-round(pd/2);
    prange = np.round(range(-2 * pd, -pd / 2 + 1))

    #  Make a score window, which begins biased towards 120bpm and skewed.
    # txwt = (-beat_tightness*abs((log(prange/-pd)).^2));
    txwt = np.exp(-0.5 * (beat_tightness * (np.log(np.true_divide(prange, -pd)))) ** 2.0)

    #  'Starting' is 1 for periods of (near) silence.
    starting = 1

    ### 3 Forward step ###

    #  Main forward loop. Go through each window, padding zeros backwards if
    #  needed, and add the cumulative score to the prior (txwt).
    for i in range(localscore.shape[0]):
        # for i in range(1):

        #  Move the time window along
        timerange = np.add(i, prange)

        #  Are we reaching back before time zero?
        zpad = np.maximum(0, np.minimum(1 - timerange[0], prange.shape[0]))

        #  Search over all possible predecessors and apply transition
        #  weighting
        scorecands = txwt * np.hstack([np.zeros(zpad), cumscore[timerange[zpad:]]])

        #  Find best predecessor beat
        current_score = np.max(scorecands)
        beat_location = np.argmax(scorecands)

        #  Add on local score
        cumscore[i] = alpha * current_score + (1 - alpha) * localscore[i]

        #  special case to catch first onset. Stop if the local score is small (ie
        #  if there's near silence)
        if (starting == 1) and (localscore[i] < 0.01 * np.max(localscore)):
            backlink[i] = -1
            # default
        else:
            #  found a probable beat, store it and leave the starting/silence
            #  scenario.
            backlink[i] = timerange[beat_location]
            starting = 0

    ### 4. Get the last beat ###

    # cumscore now stores the score through the song, backlink the best
    # previous frame.

    # get the median non zero score
    maxes = localmax(cumscore)
    max_indices = np.nonzero(maxes)[0]
    peak_scores = cumscore[max_indices]

    medscore = np.median(peak_scores)

    # look for beats above 0.5 * median
    bestendposs = np.nonzero(cumscore * localmax(cumscore) > 0.5 * medscore)[0]

    # The last of these is the last beat (since the score generally increases)
    bestendx = np.max(bestendposs)

    ### 5. Backtrace ###
    # begin on last beat
    b = [int(bestendx)]

    # go while backlink is positive (we set it to be -1 in silence)
    while backlink[b[-1]] > 0:
        # append the previous beat
        b = np.hstack([b, int(backlink[b[-1]])])

    ### 6. Output ###
    # Beats are currently backwards, so flip. Also return in s (need +1)
    b = b[::-1]
    Beats = np.true_divide(np.add(b, 1), fftres)

    return Beats
Beispiel #33
0
def calculateTimeDelayXCorr(s1, s2, s1Label, s2Label, timeVector, step, lagsBound=None, withPlots=True):
    """ Estimate the delay between two normalized signals by cross-correlation.
    Normalization consists of demeaning and dividing by the maximum of the rectified signal.
    From the cross-correlation signal, the maximum value within the time range 
    (``-lagsBound``, ``lagsBound``), in *s*, is found. The time instant in
    which that maximum occurs is the time delay estimation.
    If positive, ``s2`` is early with respect to ``s1``.
    
    Parameters
    ----------
    s1, s2 : np.ndarray
        Mono-dimensional arrays representing the signals to cross-correlate.
        
    s1Label, s2Label : str
        Strings for ``s1`` and ``s2`` to show in plots.
        
    timeVector : np.ndarray
        Time line (in *s*) for both original signals ``s1`` and ``s2``.
        It must contain the same number of frames as ``s1`` and ``s2``.
        
    step : float
        Resampling step for new time line for ``s1`` and ``s2``.
        The new time line goes from ``timeVector[0]`` to ``timeVector[-1]``.
        
    lagsBounds : mixed 
        Limiting range (in *s*) around which to search for the maximum cross-correlation value.
        If None, all the time line willbe used.
        
    withPlots : bool 
        If True, plots for results willbe shown (in blocking mode).
    
    Returns
    -------
    float
        Estimated time delay (in *s*).
        
    """    
    
    
    # Upsample signals
    x = np.arange(np.min(timeVector), np.max(timeVector), step=step)
    f1 = interp1d(timeVector, s1)
    a = f1(x)
    f2 = interp1d(timeVector, s2)
    b = f2(x)
    
    # Normalize signals
    a = a - np.mean(a)
    a = a / np.abs(np.max(a))
    b = b - np.mean(b)
    b = b / np.abs(np.max(b))
    
    # Cross-correlate signals
    dx = x[1] - x[0]
    lags, c, line, ax = plt.xcorr(a, b, normed=False, usevlines=False, maxlags=x.shape[0]-1)
    lags = lags * dx
    
    # Find maximum into the bounds
    if lagsBound == None:
        idx = (c == np.max(c))
    else:
        cc = c.copy()
        cc[np.abs(lags) > lagsBound] = 0.
        idx = (cc == np.max(cc))
    tau = lags[idx]
    
    # Plot some data if requested
    if withPlots:
        plt.subplot(3,1,1)
        plt.plot(x, a)
        plt.title(s1Label)
        plt.subplot(3,1,2)
        plt.plot(x, b)
        plt.title(s2Label)
        plt.subplot(3,1,3)
        plt.plot(lags, c)
        plt.hold(True)
        plt.plot([tau,tau],[0,c[idx]],'r')
        plt.plot([tau],[c[idx]],'or')
        plt.text(1.1*tau, 1.1*c[idx], 'delay = %.3f s' % tau)
        plt.title('Cross-correlation')
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        plt.close(plt.gcf())
        
    return tau
Beispiel #34
0
def acorr(args, x):
    plt.ylim(0, 1)
    plt.xcorr(x, y, maxlags=safe_int(args, None))
Beispiel #35
0

t = numpy.linspace(t_start, t_end, t_len)
sig1 = numpy.interp(t, xls_date, y)
sig2 = numpy.interp(t, xls_date, z)

#Now sig1 and sig2 are sampled at the same points.

print sig1
print sig2

plt.figure()
#plt.plot(t,sig1)
#plt.twinx()
#plt.plot(t,sig2)
plt.xcorr(sig1,sig2)
plt.show()

"""
Rectify and smooth, so 'peaks' will stand out.
This makes big assumptions about your data;
these assumptions seem true-ish based on your plots.
"""

max_xc = 0
best_shift = 0
for shift in range(-10, 0): #Tune this search range
    xc = (numpy.roll(sig1, shift) * sig2).sum()
    if xc > max_xc:
        max_xc = xc
        best_shift = shift
Beispiel #36
0
def setupData(rvSymbols,symbolTarget,quan,path):
    symbolList = [item for key in rvSymbols for item in list(rvSymbols[key].keys())]+[symbolTarget]
    data = pd.read_csv(path+'\\'+dataPath+symbolTarget+'.csv')
    combined=pd.DataFrame({'barTime':pd.to_datetime(data.barTime),
                           'mid_'+symbolTarget:data.mid,
                           'mid5_'+symbolTarget:data.midEma5,
                           'mid10_'+symbolTarget:data.midEma10,
                           'mid20_'+symbolTarget:data.midEma20,
                           'imbl': data.imbl,
                           'imbl3': data.imbl3,
                           'imbl3Chg':data.imbl3 - data.imbl3_30,
                           'imblThrough': data.imblThrough,
                           'imblSpread': data.imbl - data.imblThrough,
                           'imblCnt3': data.imblCnt3,
                           'imblCnt3Chg':data.imblCnt3 - data.imblCnt3_30,
                           'imblCnt': data.imblCnt,
                           'imblCntThrough': data.imblCntThrough,
                           'imblCntSpread': data.imblCnt - data.imblCntThrough,
                           'imblCncl': data.imblCncl,
                           'imblCnclChg':data.imblCncl - data.imblCncl_30
                           })
    if symbolTarget in Stock_Conversion_Data.keys():
        combined = stockConversion(combined,Stock_Conversion_Data[symbolTarget],'barTime')

    rawFeature = combined.columns - ['barTime','mid','flattener','date','timeStepCheck']-[col for col in combined.columns if ('future' in col)|('feature' in col)|(('pnl' in col)|('mid' in col))]
    for col in rawFeature:
        print(col, np.abs(combined[col]).quantile(quan))
        combined['feature_'+col]=np.tanh(combined[col]/np.abs(combined[col]).quantile(quan))

    for symbol in symbolList:
        if symbol!=symbolTarget:
            data = pd.read_csv(path+'\\'+dataPath + symbol + '.csv')
            data['barTime']=pd.to_datetime(data.barTime)
            if symbol in Stock_Conversion_Data.keys():
                data = stockConversion(data, Stock_Conversion_Data[symbol], 'barTime')

            combined = pd.merge(combined, pd.DataFrame({'mid_' + symbol:data.mid,
                                                        'mid5_' + symbol:data.midEma5,
                                                        'mid10_' + symbol:data.midEma10,
                                                        'mid20_' + symbol:data.midEma20,
                                                        'barTime':data.barTime}),
                                on='barTime', how='left')
    changeColume = 'mid'
    for dataStep in 1,5:
        minuteDelta = dataStep
        combined['timeStepCheck']=combined.barTime.diff(dataStep).shift(-dataStep)
        for symbol in symbolList:
            combined['future_change_'+str(dataStep)+'_'+symbol]=combined[changeColume+'_'+symbol].diff(dataStep).shift(-dataStep)
            combined.ix[combined['timeStepCheck']!=pd.Timedelta(minutes = minuteDelta),'future_change_'+str(dataStep)+'_'+symbol]=np.NaN
            combined['future_nextChange_'+str(dataStep)+'_'+symbol]=combined['future_change_'+str(dataStep)+'_'+symbol].shift(-1)

    for symbol in symbolList:
        combined['midChange_' + symbol] = combined['mid5_'+symbol] - combined['mid10_'+symbol]
        combined['midChange2_' + symbol] = combined['mid5_'+symbol] - 2 * combined['mid10_'+symbol] + combined['mid20_'+symbol]
        combined['feature_midChange_'+symbol] = np.tanh(
            combined['midChange_' + symbol] / np.abs(combined['midChange_' + symbol]).quantile(quan))
        combined['feature_midChange2_' + symbol] = np.tanh(
            combined['midChange2_' + symbol] / np.abs(combined['midChange2_' + symbol]).quantile(quan))

    #get retlative value feature
    for key in rvSymbols.keys():
        comp = rvSymbols[key]
        den = np.sum(list(comp.values()))
        combined['feature_midChange_'+key] = combined[['feature_midChange_'+name for name in comp.keys()]].dot(list(comp.values()))/den
        combined['feature_midChange2_' + key] = combined[['feature_midChang2_' + name for name in comp.keys()]].dot(
            list(comp.values()))
        combined['product'] = combined['feature_midChange_' + key] * combined[
            'feature_midChange_' + symbolTarget]
        combined['absDiff'] = np.abs(combined['feature_midChange_' + key]) - np.abs(
            combined['feature_midChange_' + symbolTarget])
        combined['feature_midChangeOver_' + key] = combined['feature_midChange_' + key]
        combined.ix[(combined['product'] >= 0) & (combined['absDiff'] < 0), 'feature_midChangeOver_' + key] = 0
        combined['feature_midChangeUnder_' + key] = combined['feature_midChange_' + key] - combined[
            'feature_midChangeOver_' + key]
        combined.drop(['feature_midChange_'+name for name in comp.keys()], inplace=True, axis=1)
        combined.drop(['feature_midChange2_' + name for name in comp.keys()], inplace=True, axis=1)



    col='future_change_1_'
    for symbol in symbolList:
        if symbol!=symbolTarget:
            plt.xcorr(combined.dropna()[col+symbolTarget], combined.dropna()[col+symbol].values, normed=True, usevlines=True, maxlags=15)
        plt.title(symbolTarget + '_' + symbol)
        plt.show()
    return combined
#cormax = numpy.argmax(correl[len(correl)/2-20:len(correl)/2-20])
#print cormax

plt.figure(12)
plt.plot(xdata, yfilt_O1, 'r')
plt.twinx()
plt.plot(xdata, zfilt_O1, 'b')
plt.title('O1')
plt.figure(27)
plt.plot(xdata, yfilt_M2, 'r')
plt.twinx()
plt.plot(xdata, zfilt_M2, 'b')
plt.title('M2')
plt.figure(29)
plt.xcorr(yfilt_O1, zfilt_O1, maxlags=10)
plt.title('Cross Correl O1')
plt.figure(30)
plt.xcorr(yfilt_M2, zfilt_M2, maxlags=10)
plt.title('Cross Correl M2')
plt.draw()

###############################################################################
#
########################################################### Regression Analysis
#
###############################################################################

#define starting values
x0 = numpy.array([sum(zdata) / float(len(zdata)), 1.5, 1.5])
Beispiel #38
0
def tideproc(inpfile,bpdata,edata):

    delta = 1.1562
    p = 7.692E5

    ###########################################################################
    """
    INPUT FILES ARE PUT IN BELOW
    """

    lag = 100
    tol = 0.05  #percentage of variance in frequency allowed; default is 2%
    r = 1 #well radius in inches
    Be = 0.10 #barometric efficiency
    numb = 2000 # number of values to process
    spd = 24 #samples per day hourly sampling = 24
    lagt = -6.0 #hours different from UTC (negative indicates west); UT is -7

    """
    INPUT FILES END HERE
    """
    ###########################################################################

    #frequencies in cpd
    O1 = 0.9295 #principal lunar
    K1 = 1.0029 #Lunar Solar
    M2 = 1.9324 #principal lunar
    S2 = 2.00   #Principal Solar
    N2 = 1.8957 #Lunar elliptic

    #periods in days
    P_M2 = 0.5175
    P_O1 = 1.0758

    # amplitude factors from Merritt 2004
    b_O1 = 0.377
    b_P1 = 0.176
    b_K1 = 0.531
    b_N2 = 0.174
    b_M2 = 0.908
    b_S2 = 0.423
    b_K2 = 0.115

    #love numbers and other constants from Agnew 2007
    l = 0.0839
    k = 0.2980
    h = 0.6032
    Km = 1.7618 #general lunar coefficient
    pi = math.pi #pi

    #gravity and earth radius
    g = 9.821  #m/s**2
    a = 6.3707E6 #m
    g_ft = 32.23 #ft
    a_ft = 2.0902e7 #ft/s**2

    #values to determine porosity from Merritt 2004 pg 56
    Beta = 2.32E-8
    rho = 62.4

    impfile = inpfile
    outfile = 'c'+impfile
    data = csv.reader(open(impfile, 'rb'), delimiter=",")
    dy, u, l, nm, d, wl, t, vert =[], [], [], [], [], [], [], []
    yrdty, year, month, day, hours, minutes, seconds, julday = [], [], [], [], [], [], [], []
    yrdty2,year2, month2, day2, hours2, minutes2, seconds2, julday2 = [], [], [], [], [], [], [], []
    yrdty3,year3, month3, day3, hours3, minutes3, seconds3, julday3 = [], [], [], [], [], [], [], []

    # read in bp data
    bpdata = bpdata
    bdata = csv.reader(open(bpdata, 'rb'), delimiter=",")
    v, d2, bp=[], [], []
    d3, SG33WDD, PW19S2, PW19M2, MXSWDD = [],[],[],[],[]

    etdata = edata

    #assign data in csv to arrays
    for row in data:
        u.append(row)

    for row in bdata:
        v.append(row)

    #pick well name, lat., long., and elevation data out of header of wl file
    well_name = u[0][1]
    lon = [float(u[5][1])]
    latt = [round(float(u[4][1]),3)]
    el = [round(float(u[10][1])/3.2808,3)]

    #import the bp data
    with open(bpdata, 'rb') as tot:
        csvreader1 = csv.reader(tot)
        for row in skip_first(csvreader1, 3):
            d2.append(row[2])
            bp.append(float(row[3]))

    #import the wl data
    with open(impfile, 'rb') as total:
        csvreader = csv.reader(total)
        for row in skip_first(csvreader, 62):
            dy.append(row[0])
            nm.append(row[1])

    #import supplemental earth tide data
    with open(etdata, 'rb') as tos:
        csvreader2 = csv.reader(tos)
        for row in skip_first(csvreader2,2):
            d3.append(row[5])
            SG33WDD.append(float(row[6]))
            PW19S2.append(row[7])
            PW19M2.append(row[8])
            MXSWDD.append(row[9])

    #import a smaller part of the wl data
    for i in range(len(dy)-numb,len(dy)):
        d.append(dy[i])
        wl.append(nm[i])

    #fill in last line of wl data
    wl[-1]=wl[-2]
    for i in range(len(wl)):
        if wl[i] is '':
            wl[i]=wl[i-1]

    #create a list of latitude, longitude, elevation, and gmt for tidal calculation
    lat = latt*len(d)
    longit = lon*len(d)
    elev = el*len(d)
    gmtt = [float(lagt)]*len(d)

    # define the various components of the date, represented by d
    # dates for wl data
    for i in range(len(d)):
        yrdty.append(time.strptime(d[i],"%Y-%m-%d %H:%M:%S"))
        year.append(int(yrdty[i].tm_year))
        month.append(int(yrdty[i].tm_mon))
        day.append(int(yrdty[i].tm_mday))
        hours.append(int(yrdty[i].tm_hour))
        minutes.append(int(yrdty[i].tm_min))
        seconds.append(int(0)) #yrdty[i].tm_sec
    # dates for bp data
    for i in range(len(d2)):
        yrdty2.append(time.strptime(d2[i],"%Y-%m-%d %H:%M:%S"))
        year2.append(int(yrdty2[i].tm_year))
        month2.append(int(yrdty2[i].tm_mon))
        day2.append(int(yrdty2[i].tm_mday))
        hours2.append(int(yrdty2[i].tm_hour))
        minutes2.append(int(yrdty2[i].tm_min))
        seconds2.append(int(0)) #yrdty2[i].tm_sec
    # dates for bp data
    for i in range(len(d3)):
        yrdty3.append(time.strptime(d3[i],"%m/%d/%Y %H:%M"))
        year3.append(int(yrdty3[i].tm_year))
        month3.append(int(yrdty3[i].tm_mon))
        day3.append(int(yrdty3[i].tm_mday))
        hours3.append(int(yrdty3[i].tm_hour))
        minutes3.append(int(yrdty3[i].tm_min))
        seconds3.append(int(0)) #yrdty2[i].tm_sec

    #julian day calculation
    def calc_jday(Y, M, D, h, m, s):
      # Y is year, M is month, D is day
      # h is hour, m is minute, s is second
      # returns decimal day (float)
      Months = [0, 31, 61, 92, 122, 153, 184, 214, 245, 275, 306, 337]
      if M < 3:
        Y = Y-1
        M = M+12
      JD = math.floor((Y+4712)/4.0)*1461 + ((Y+4712)%4)*365
      JD = JD + Months[M-3] + D
      JD = JD + (h + (m/60.0) + (s/3600.0)) / 24.0
      # corrections-
      # 59 accounts for shift of year from 1 Jan to 1 Mar
      # -13 accounts for shift between Julian and Gregorian calendars
      # -0.5 accounts for shift between noon and prev. midnight
      JD = JD + 59 - 13.5
      return JD

    # create a list of julian dates
    for i in range(len(d)):
        julday.append(calc_jday(year[i],month[i],day[i],hours[i],minutes[i],seconds[i]))

    for i in range(len(d2)):
        julday2.append(calc_jday(year2[i],month2[i],day2[i],hours2[i],minutes2[i],seconds2[i]))

    for i in range(len(d3)):
        julday3.append(calc_jday(year3[i],month3[i],day3[i],hours3[i],minutes3[i],seconds3[i]))

    #run tidal function
    for i in range(len(d)):
        t.append(tamura.tide(int(year[i]), int(month[i]), int(day[i]), int(hours[i]), int(minutes[i]), int(seconds[i]), float(longit[i]), float(lat[i]), float(elev[i]), 0.0, lagt)) #float(gmtt[i])

    vert, Grav_tide, WDD_tam, areal, potential, dilation = [], [], [], [], [], []

    #determine vertical strain from Agnew 2007
    #units are in sec squared, meaning results in mm
    # areal determine areal strain from Agnew 2007, units in mm
    #dilation from relationship defined using Harrison's code
    #WDD is used to recreate output from TSoft
    for i in range(len(t)):
        areal.append(t[i]*p*1E-5)
        potential.append(-318.49681664*t[i] - 0.50889238)
        WDD_tam.append(t[i]*(-.99362956469)-7.8749754)
        dilation.append(0.381611837*t[i] - 0.000609517)
        vert.append(t[i] * 1.692)
        Grav_tide.append(-1*t[i])

    #convert to excel date-time numeric format
    xls_date = []
    for i in range(len(d)):
        xls_date.append(float(julday[i])-2415018.5)

    xls_date2 = []
    for i in range(len(d2)):
        xls_date2.append(float(julday2[i])-2415018.5)

    xls_date3 = []
    for i in range(len(d3)):
        xls_date3.append(float(julday3[i])-2415018.5)

    t_start = xls_date[0]
    t_end = xls_date[-1]
    t_len = (len(xls_date))

    #align bp data with wl data
    t1 = numpy.linspace(t_start, t_end, t_len)
    bpint = numpy.interp(t1, xls_date2, bp)
    etint = numpy.interp(t1, xls_date3, SG33WDD)

    xprep, yprep, zprep = [], [], []
    #convert text from csv to float values
    for i in range(len(julday)):
        xprep.append(float(julday[i]))
        yprep.append(float(dilation[i]))
        zprep.append(float(wl[i]))

    #put data into numpy arrays for analysis
    xdata = numpy.array(xprep)
    ydata = numpy.array(yprep)
    zdata = numpy.array(zprep)
    bpdata = numpy.array(bpint)
    etdata = numpy.array(etint)
    bp = bpdata
    z = zdata
    y = ydata
    #    tempdata = numpy.array(tempint)
    #standarize julian day to start at zero

    x0data = xdata - xdata[0]

    wl_z = []
    mn = numpy.mean(z)
    std = numpy.std(z)
    for i in range(len(z)):
        wl_z.append((z[i]-mn)/std)

    bp_z = []
    mn = numpy.mean(bp)
    std = numpy.std(bp)
    for i in range(len(bp)):
        bp_z.append((bp[i]-mn)/std)

    t_z = []
    mn = numpy.mean(y)
    std = numpy.std(y)
    for i in range(len(y)):
        t_z.append((t[i]-mn)/std)

    dbp = []
    for i in range(len(bp)-1):
        dbp.append(bp[i]-bp[i+1])

    dwl = []
    for i in range(len(z)-1):
        dwl.append(z[i]-z[i+1])

    dt = []
    for i in range(len(y)-1):
        dt.append(y[i]-y[i+1])

    dbp.append(0)

    dwl.append(0)

    dt.append(0)


    ###########################################################################
    #
    ############################################################ Filter Signals
    #
    ###########################################################################
    ''' these filtered data are not necessary,
    but are good for graphical comparison '''
    ### define filtering function
    def filt(frq,tol,data):
        #define frequency tolerance range
        lowcut = (frq-frq*tol)
        highcut = (frq+frq*tol)
        #conduct fft
        ffta = fft.fft(data)
        bp2 = ffta[:]
        fftb = fft.fftfreq(len(bp2))
        #make every amplitude value 0 that is not in the tolerance range of frequency of interest
        #24 adjusts the frequency to cpd
        for i in range(len(fftb)):
            #spd is samples per day (if hourly = 24)
            if (fftb[i]*spd)>highcut or (fftb[i]*spd)<lowcut:
                bp2[i]=0
        #conduct inverse fft to transpose the filtered frequencies back into time series
        crve = fft.ifft(bp2)
        yfilt = crve.real
        return yfilt



    #filter tidal data
    yfilt_O1 = filt(O1,tol,ydata)
    yfilt_M2 = filt(M2,tol,ydata)
    #filter wl data
    zfilt_O1 = filt(O1,tol,zdata)
    zfilt_M2 = filt(M2,tol,zdata)

    zffta = abs(fft.fft(zdata))
    zfftb = abs(fft.fftfreq(len(zdata))*spd)



    def phasefind(A,frq):
        spectrum = fft.fft(A)
        freq = fft.fftfreq(len(spectrum))
        r = []
        #filter = eliminate all values in the wl data fft except the frequencies in the range of interest
        for i in range(len(freq)):
            #spd is samples per day (if hourly = 24)
            if (freq[i]*spd)>(frq-frq*tol) and (freq[i]*spd)<(frq+frq*tol):
                r.append(freq[i]*spd)
            else:
                r.append(0)
        #find the place of the max complex value for the filtered frequencies and return the complex number
        p = max(enumerate(r), key=itemgetter(1))[0]
        pla = spectrum[p]
        T5 = cmath.phase(pla)*180/pi
        return T5

    yphsO1 = phasefind(ydata,O1)
    zphsO1 = phasefind(zdata,O1)
    phsO1 = zphsO1 - yphsO1
    yphsM2 = phasefind(ydata,M2)
    zphsM2 = phasefind(zdata,M2)
    phsM2 = zphsM2 - yphsM2


#    def phase_find(A,B,P):
#        period = P
#        tmax = len(xdata)*24
#        nsamples = len(A)
#        # calculate cross correlation of the two signals
#        t6 = numpy.linspace(0.0, tmax, nsamples, endpoint=False)
#        xcorr = numpy.correlate(A, B)
#        # The peak of the cross-correlation gives the shift between the two signals
#        # The xcorr array goes from -nsamples to nsamples
#        dt6 = numpy.linspace(-t6[-1], t6[-1], 2*nsamples-1)
#        recovered_time_shift = dt6[xcorr.argmax()]
#
#        # force the phase shift to be in [-pi:pi]
#        #recovered_phase_shift = 2*pi*(((0.5 + recovered_time_shift/(period*24)) % 1.0) - 0.5)
#        return recovered_time_shift
#
#
#    O1_ph= phase_find(ydata,zdata,P_O1)
#    M2_ph= phase_find(ydata,zdata,P_M2)




    ###########################################################################
    #
    ####################################################### Regression Analysis
    #
    ###########################################################################

    #define functions used for least squares fitting
    def f3(p, x):
        #a,b,c = p
        m = 2.0 * O1 * pi
        y = p[0] + p[1] * (numpy.cos(m*x)) + p[2] * (numpy.sin(m*x))
        return y

    def f4(p, x):
        #a,b,c = p
        m =2.0 * M2 * pi
        y  = p[0] + p[1] * (numpy.cos(m*x)) + p[2] * (numpy.sin(m*x))
        return y

    #define functions to minimize
    def err3(p,y,x):
        return y - f3(p,x)

    def err4(p,y,x):
        return y - f4(p,x)

    #conducts regression, then calculates amplitude and phase angle
    def lstsq(func,y,x):
        #define starting values with x0
       x0 = numpy.array([sum(y)/float(len(y)), 0.01, 0.01])
       fit ,chks = optimization.leastsq(func, x0, args=(y, x))
       amp = math.sqrt((fit[1]**2)+(fit[2]**2))      #amplitude
       phi = numpy.arctan(-1*(fit[2],fit[1]))*(180/pi)   #phase angle
       return amp,phi,fit

    #water level signal regression
    WLO1 = lstsq(err3,zdata,xdata)
    WLM2 = lstsq(err4,zdata,xdata)

    #tide signal regression
    TdO1 = lstsq(err3,ydata,xdata)
    TdM2 = lstsq(err4,ydata,xdata)

    #calculate phase shift
    phase_sft_O1 = WLO1[1] - TdO1[1]
    phase_sft_M2 = WLM2[1] - TdM2[1]

    delt_O1 = (phase_sft_O1/(O1*360))*24
    delt_M2 = (phase_sft_M2/(M2*360))*24

    #determine tidal potential Cutillo and Bredehoeft 2010 pg 5 eq 4
    f_O1 = math.sin(float(lat[1])*pi/180)*math.cos(float(lat[1])*pi/180)
    f_M2 = 0.5*math.cos(float(lat[1])*pi/180)**2

    A2_M2 = g_ft*Km*b_M2*f_M2
    A2_O1 = g_ft*Km*b_O1*f_O1

    #Calculate ratio of head change to change in potential
    dW2_M2 = A2_M2/(WLM2[0])
    dW2_O1 = A2_O1/(WLO1[0])

    #estimate specific storage Cutillo and Bredehoeft 2010
    def SS(rat):
        return 6.95690250E-10*rat

    Ss_M2 = SS(dW2_M2)
    Ss_O1 = SS(dW2_O1)

    def curv(Y,P,r):
        rc = (r/12.0)*(r/12.0)
        Y = Y
        X = -1421.15/(0.215746 + Y) - 13.3401 - 0.000000143487*Y**4 - 9.58311E-16*Y**8*math.cos(0.9895 + Y + 1421.08/(0.215746 + Y) + 0.000000143487*Y**4)
        T = (X*rc)/P
        return T

    Trans_M2 = curv(phase_sft_O1,P_M2,r)
    Trans_O1 = curv(phase_sft_M2,P_O1,r)


    ###########################################################################
    #
    ############################################ Calculate BP Response Function
    #
    ###########################################################################

    # create lag matrix for regression
    bpmat = tools.lagmat(dbp, lag, original='in')
    etmat = tools.lagmat(dt, lag, original='in')
    #lamat combines lag matrices of bp and et
    lamat = numpy.column_stack([bpmat,etmat])
    #for i in range(len(etmat)):
    #    lagmat.append(bpmat[i]+etmat[i])
    #transpose matrix to determine required length
    #run least squared regression
    sqrd = numpy.linalg.lstsq(bpmat,dwl)
    #determine lag coefficients of the lag matrix lamat
    sqrdlag = numpy.linalg.lstsq(lamat,dwl)

    wlls = sqrd[0]
    #lagls return the coefficients of the least squares of lamat
    lagls = sqrdlag[0]

    cumls = numpy.cumsum(wlls)
    #returns cumulative coefficients of et and bp (lamat)
    lagcumls =numpy.cumsum(lagls)

    ymod = numpy.dot(bpmat,wlls)
    lagmod = numpy.dot(lamat,lagls)

    #resid gives the residual of the bp
    resid=[]
    for i in range(len(dwl)):
        resid.append(dwl[i] - ymod[i])
    #alpha returns the lag coefficients associated with bp
    alpha = lagls[0:len(lagls)/2]
    alpha_cum = numpy.cumsum(alpha)
    #gamma returns the lag coefficients associated with ET
    gamma = lagls[len(lagls)/2:len(lagls)]
    gamma_cum = numpy.cumsum(gamma)

    lag_time = []
    for i in range(len(xls_date)):
        lag_time.append((xls_date[i] - xls_date[0])*24)


    ######################################### determine slope of late time data
    lag_trim1 = lag_time[0:len(cumls)]
    lag_time_trim = lag_trim1[len(lag_trim1)-(len(lag_trim1)/2):len(lag_trim1)]
    alpha_trim = alpha_cum[len(lag_trim1)-(len(lag_trim1)/2):len(lag_trim1)]
    #calculate slope of late-time data
    lag_len = len(lag_time_trim)
    tran = numpy.array([lag_time_trim, numpy.ones(lag_len)])

    reg_late = numpy.linalg.lstsq(tran.T,alpha_trim)[0]
    late_line=[]
    for i in range(len(lag_trim1)):
        late_line.append(reg_late[0] * lag_trim1[i] + reg_late[1]) #regression line


    ######################################## determine slope of early time data
    lag_time_trim2 = lag_trim1[0:len(lag_trim1)-int(round((len(lag_trim1)/1.5),0))]
    alpha_trim2 = alpha_cum[0:len(lag_trim1)-int(round((len(lag_trim1)/1.5),0))]

    lag_len1 = len(lag_time_trim2)
    tran2 = numpy.array([lag_time_trim2, numpy.ones(lag_len1)])

    reg_early = numpy.linalg.lstsq(tran2.T,alpha_trim2)[0]
    early_line= []
    for i in range(len(lag_trim1)):
        early_line.append(reg_early[0] * lag_trim1[i] + reg_early[1]) #regression line

    aquifer_type = []
    if reg_early[0] > 0.001:
        aquifer_type = 'borehole storage'
    elif reg_early[0] < -0.001:
        aquifer_type = 'unconfined conditions'
    else:
        aquifer_type = 'confined conditions'


    ###########################################################################
    #
    ################################################################ Make Plots
    #
    ###########################################################################
    fig_1_lab = well_name + ' bp response function'
    fig_2_lab = well_name + ' signal processing'

    plt.figure(fig_1_lab)
    plt.suptitle(fig_1_lab, x= 0.2, y=.99, fontsize='small')
    plt.subplot(2,1,1)
    #plt.plot(lag_time[0:len(cumls)],cumls, label='b.p. alone')
    plt.plot(lag_time[0:len(cumls)],alpha_cum,"o", label='b.p. when \n considering e.t.')
    # plt.plot(lag_time[0:len(cumls)],gamma_cum, label='e.t.')
    plt.plot(lag_trim1, late_line, 'r-', label='late reg.')
    plt.plot(lag_trim1, early_line, 'g-', label='early reg.')
    plt.xlabel('lag (hr)')
    plt.ylabel('cumulative response function')
    plt.legend(loc=4,fontsize='small')
    plt.subplot(2,1,2)
    plt.plot(lag_time,dwl, label='wl', lw=2)
    plt.plot(lag_time,ymod, label='wl modeled w bp')
    plt.plot(lag_time,lagmod, 'r--', label='wl modeled w bp&et')
    plt.legend(loc=4,fontsize='small')
    plt.xlim(0,lag)
    plt.ylabel('change (ft)')
    plt.xlabel('time (hrs)')
    plt.tight_layout()
    plt.savefig('l'+ os.path.splitext(impfile)[0]+'.pdf')


    plt.figure(fig_2_lab)
    plt.suptitle(fig_2_lab, x=0.2, fontsize='small')
    plt.title(os.path.splitext(impfile)[0])
    plt.subplot(4,1,1)
    plt.xcorr(yfilt_O1,zfilt_O1,maxlags=10)
    plt.ylim(-1.1,1.1)
    plt.tick_params(labelsize=8)
    plt.xlabel('lag (hrs)',fontsize='small')
    plt.ylabel('lag (hrs)',fontsize='small')
    plt.title('Cross Correl O1',fontsize='small')
    plt.subplot(4,1,2)
    plt.xcorr(yfilt_M2,zfilt_M2,maxlags=10)
    plt.ylim(-1.1,1.1)
    plt.tick_params(labelsize=8)
    plt.xlabel('lag (hrs)',fontsize='small')
    plt.ylabel('lag (hrs)',fontsize='small')
    plt.title('Cross Correl M2',fontsize='small')
    plt.subplot(4,1,3)
    plt.plot(zfftb,zffta)
    plt.tick_params(labelsize=8)
    plt.xlabel('frequency (cpd)',fontsize='small')
    plt.ylabel('amplitude')
    plt.title('WL fft',fontsize='small')
    plt.xlim(0,4)
    plt.ylim(0,30)
    plt.subplot(4,1,4)
    plt.plot(x0data,zdata, 'b')
    plt.tick_params(labelsize=8)
    plt.xlabel('julian days',fontsize='small')
    plt.ylabel('water level (ft)',fontsize='small')
    plt.twinx()
    plt.plot(x0data,f3(WLO1[2],x0data), 'r')
    plt.plot(x0data,f4(WLM2[2],x0data), 'g')
    plt.tick_params(labelsize=8)
    plt.xlim(0,10)
    plt.ylabel('tidal strain (ppb)',fontsize='small')
    plt.tick_params(labelsize=8)
    plt.tight_layout()
    plt.title('Regression Fit',fontsize='small')
    plt.savefig('f'+ os.path.splitext(impfile)[0]+'.pdf')
    plt.close()

    ###########################################################################
    #Write output to files
    ###########################################################################

    # create row of data for compiled output file info.csv
    myCSVrow = [os.path.splitext(inpfile)[0],well_name, A2_O1, A2_M2, phase_sft_O1, phase_sft_M2, delt_O1,
                delt_M2, Trans_M2, Trans_O1, Ss_O1, Ss_M2, WLO1[1], TdO1[1], WLM2[1], TdM2[1],
                WLO1[0], TdO1[0], WLM2[0], TdM2[0], WLO1[2][1], TdO1[2][1], WLM2[2][1],
                TdM2[2][1], WLO1[2][2], TdO1[2][2], WLM2[2][2], TdM2[2][2], reg_late[1], reg_early[0], aquifer_type, phsO1, phsM2]
    # add data row to compiled output file
    compfile = open('info.csv', 'a')
    writer = csv.writer(compfile)
    writer.writerow(myCSVrow)
    compfile.close()


    #export tidal data to individual (well specific) output file
    with open(outfile, "wb") as f:
        filewriter = csv.writer(f, delimiter=',')
    #write header
        header = ['xl_time','date_time','V_ugal','vert_mm','areal_mm','WDD_tam','potential','dilation_ppb','wl_ft','dbp','dwl','resid','bp','Tsoft_SG23']
        filewriter.writerow(header)
        for row in range(0,1):
            for i in range(len(d)):
            #you can add more columns here
                filewriter.writerow([xls_date[i],d[i],Grav_tide[i],vert[i],areal[i],WDD_tam[i],potential[i],
                                     dilation[i],wl[i],dbp[i],dwl[i],resid[i],bp[i],etint[i]])
#读入数据
zeta = np.loadtxt('Zeta.txt')
#单位转换成毫米
zeta = zeta * 1000
#输入中间时刻,计算天文要素
P, Q, sigma, V, f, kappa, alpha, corr, mask, record = get_variables(middle_date)
#进行调和分析,计算调和常数
H, g, S_0 = Tides.analyze(P, zeta, sigma, V, f, kappa, alpha, corr)
#带入调和常数进行回报
zeta_predict = Tides.predict(-172, 172, sigma, V, f, H, g, S_0, mask, 1.)
#回报逐分钟的潮位用以计算高低潮时和潮高
zeta_predict_minute = Tides.predict(-172*60, 172*60, sigma, V, f, H, g, S_0, mask, 1./60.)
#同图绘制回报过程曲线与实测过程曲线
show_diff(zeta, zeta_predict);
#计算回报的过程曲线与真实测量值的相关系数,若其数值相当接近1,说明回报结果正确
print '相关系数:%.5f' % plt.xcorr(zeta, zeta_predict, maxlags=1)[1][1]
print '平均海面:%.2fmm' % S_0
print '分潮及其对应的H、g:'
for k in record.keys():
    print '分潮名称: %s\t\t振幅: %.2fmm\t\t迟角: %.2frad' % (record[k], H[k-1], g[k-1])

    
def print_max_min(data, time):
    ssum1 = 0.
    ssum2 = 0.
    num1 = 0
    num2 = 0
    t = dateutil.parser.parse(time)
    for i in range(1, data.size - 1):
        if data[i] > data[i - 1] and data[i] > data[i + 1]:
            print '高潮时: %s\t潮位:%dmm' % ( (t + datetime.timedelta(minutes=i)).strftime('%c'), data[i] )
Beispiel #40
0
#%% Data analysis

# Plot GDP after normalization
plt.plot(data["GDP_csa"], label="GDP_csa")
# plt.plot(data["GDP_na"], label='GDP_na')
plt.title("Transformed GDP")
plt.ylabel("GDP")
plt.xlabel("Quarter")
plt.legend(loc="upper left")
# plt.yscale("log")
plt.show()

# Crosscorrelation
column_names = ["lags"] + list(data.columns)
Crosscorrelationdf = pd.DataFrame(columns=column_names)
lags, _, _, _ = plt.xcorr(data["GDP_csa"], data["GDP_csa"])
Crosscorrelationdf["lags"] = lags
for col in data.columns:
    standardisedGDP = np.array((data["GDP_csa"] - data["GDP_csa"].mean()))
    standardisedCol = np.array(data[col] - data[col].mean())
    _, corr, _, _ = plt.xcorr(standardisedGDP, standardisedCol)
    Crosscorrelationdf[col] = corr

#%% GDP per employed person

GDPperEmployed = Nillesdata["GDP_csa"] / Nillesdata["PAO"]
column_names = ["Quarter", "PeGDP"]
GDPperEmployeddf = pd.DataFrame(columns=column_names)
GDPperEmployeddf["Quarter"] = Quarter
GDPperEmployeddf["PeGDP"] = GDPperEmployed
Beispiel #41
0
        tsaplots.plot_acf(data, lags=lags, alpha=alpha)
        pyplot.show()

    #partial auto correlation
    elif op == "pacf":
        lags = config.getIntConfig("pacf.lags")[0]
        alpha = config.getFloatConfig("pacf.alpha")[0]
        tsaplots.plot_pacf(data, lags=lags, alpha=alpha)
        pyplot.show()

    #cross correlation
    elif op == "ccf":
        dataSec = loadData(config, True)
        normed = config.getBooleanConfig("ccf.normed")[0]
        maxlags = config.getIntConfig("ccf.maxlags")[0]
        pyplot.xcorr(data, dataSec, normed=normed, maxlags=maxlags)
        pyplot.show()

    #stationarity test
    elif op == "adf":
        regression = config.getStringConfig("adf.regression")[0]
        autolag = config.getStringConfig("adf.autolag")[0]
        result = adfuller(data, regression=regression, autolag=autolag)
        print(f'ADF Statistic: {result[0]}')
        print(f'p value: {result[1]}')
        print(f'num lags: {result[2]}')
        print(f'num observation for regression: {result[3]}')
        print('Critial Values:')
        for key, value in result[4].items():
            print(f'   {key}, {value}')
Beispiel #42
0
plt.plot(lag_time, lagmod, 'r--', label='wl modeled w bp&et')
plt.legend(loc=4, fontsize='small')
plt.xlim(0, lag)
plt.ylabel('change (ft)')
plt.xlabel('time (hrs)')
plt.tight_layout()
pp.savefig()
plt.close()

#figure 2
fig_2 = well_name + ' signal processing'
plt.figure(fig_2)
plt.suptitle(fig_2, x=0.2, fontsize='small')
plt.title(os.path.splitext(wlfile)[0])
plt.subplot(2, 1, 1)
plt.xcorr(dl_O1[1], wl_O1[1], maxlags=50)
plt.ylim(-1.1, 1.1)
plt.tick_params(which='both', labelsize=8)
plt.xlabel('lag (hrs)', fontsize='small')
plt.ylabel('correl', fontsize='small')
plt.title('Cross Correl O1', fontsize='small')
plt.subplot(2, 1, 2)
plt.xcorr(dl_M2[1], wl_M2[1], maxlags=50)
plt.ylim(-1.1, 1.1)
plt.tick_params(which='both', labelsize=8)
plt.xlabel('lag (hrs)', fontsize='small')
plt.ylabel('correl', fontsize='small')
plt.title('Cross Correl M2', fontsize='small')
plt.tight_layout()
pp.savefig()
plt.close()
Beispiel #43
0
                                                          drop_nan=False)

    label = np.array(matrix['SSHA_105'])
    #    label = ml_utilities.matrix_min_max_rescale(label, 1, -1, axis=0)
    matrix = matrix.drop(columns=['SSHA_35', 'SSHA_71', 'SSHA_105'])
    #    matrix = ml_utilities.matrix_min_max_rescale(matrix, 0.5, -0.5, axis=0)
    matrix = np.array(matrix)
    matrix = ml_utilities.my_standardizer(matrix, matrix)  # standardize
    label = ml_utilities.my_standardizer(np.expand_dims(label, axis=1),
                                         np.expand_dims(label,
                                                        axis=1))  # standardize
    matrix = matrix.squeeze()
    label = label.squeeze()
    _, ccorr, _, _ = plt.xcorr(label,
                               matrix,
                               usevlines=True,
                               maxlags=400,
                               normed=True,
                               lw=2)
    ccorr_max = np.nanmax(np.abs(ccorr))
    #    ccorr_min = ccorr.min()
    #    print(np.isnan(ccorr))
    if (ccorr_max < 0.5):
        continue

#    fig = plt.figure(figsize=(15,10))
#    plt.plot(distance, label, distance, matrix)
#    plt.legend(['SSHA', 'SST'])
#    plt.title(filename[:-4], fontsize=23)
#    fig.savefig(os.path.join(r"C:\Users\vlachos\Desktop\SSTlevel4".replace('\\','\\'), filename +'.png'), dpi=300)

    d.append(len(distance))
Beispiel #44
0
import pandas
from matplotlib import pyplot

df = pandas.read_csv('data/ground_matches.csv')
df['team_last10_avg_diff'].fillna(0,inplace=True)
df['team_last10_avg_diff'].astype(int)
pyplot.xcorr(df['match_diff'], df['team_last10_avg_diff'], maxlags=99)
pyplot.show()