def load_data_emd(trainNum, testNum, startNum, data): print('EMD_data loading.') global ahead_num # all_data_checked = data targetData = data # 处理预测信息,划分训练集和测试集 targetData = targetData[startNum + 1: startNum + trainNum + testNum + 1] targetData = np.array(targetData).reshape(-1, 1) # #归一化,每个特征分开归一化 global scaler_target scaler_target = StandardScaler(copy=True, with_mean=True, with_std=True) targetData = scaler_target.fit_transform(targetData) decomposer = EMD(targetData) imfs = decomposer.decompose() # plot_imfs(targetData, imfs) data_decomposed = imfs.tolist() for h1 in range(len(data_decomposed)): data_decomposed[h1] = np.array(data_decomposed[h1]).reshape(-1, 1) for h2 in range(len(data_decomposed)): trainX, trainY, testX, testY = create_data(data_decomposed[h2], trainNum, ahead_num) dataset_imf = [trainX, trainY, testX, testY] data_decomposed[h2] = dataset_imf print('load_data complete.\n') return data_decomposed
def emd_plot(data): emd = EMD(data) imfs = emd.decompose() # ipdb.set_trace() plot_imfs(data,imfs) plt.legend('EMD') return imfs
def test_imfs_total_no_error(self): """ Check if the sum of the IMFs is sufficiently close to the input signal. """ signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) emd = EMD(signal) imfs = emd.decompose() assert_allclose(imfs.sum(0), signal)
def test_residue(self): """Test the residue of the emd output.""" signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) decomposer = EMD(signal, t=self.ts) imfs = decomposer.decompose() n_imfs = imfs.shape[0] n_maxima = argrelmax(imfs[n_imfs - 1, :])[0].shape[0] n_minima = argrelmin(imfs[n_imfs - 1, :])[0].shape[0] self.assertTrue(max(n_maxima, n_minima) <= 2)
def test_monotonicity_of_trend(self): """ Check if the trend is monotonic. """ signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) emd = EMD(signal) imfs = emd.decompose() # There should be two IMFs, and the rest of them are trends trend = imfs[3:, :].sum(0) assert_allclose(self.trend, trend)
def hilbert_huang(self): """ Create EMD and Calculate Hilbert-Huang """ imfs_list = [] for i in self.reshaped_x: for j in i: decomposer = EMD(j) imfs = decomposer.decompose() imfs_list.append(imfs) return np.array(imfs_list)
def get_data(l, lag=3): """ 默认滞后3项,预测一步 l 的最后一项不参与分解 """ decomposer = EMD(l[:-1]) # l的最后一项不参与分解 imfs = decomposer.decompose() # 包括m个imf和一个res项 # 得到如下的输入样本,第一个样本(1,lag,m+1),即lag个滞后项,每一项有m+1个元素 # [[imf1_1,imf2_1,...,imfm_1,res_1],[imf1_2,imf2_2,...,imfm_2,res_2],...,[imf1_lag,imf2_lag,...,imfm_lag,res_lag]] x = seq_tf_matrix(imfs.T, lag) # y为输出结果,未来一步的预测值 y = l[-len(x):] return x, y
def EEMD(sample, num_iterations): imf = {} for i in range(0, num_iterations): white_noise = generateWhiteNoise(len(sample)) x = white_noise + sample decomp = EMD(x, maxiter=10000) imfX = decomp.decompose() try: imf[imfX.shape[0]] += imfX except KeyError: imf[imfX.shape[0]] = imfX for key in imf: imf[key] /= key return imf
def EMD_data_preparation(csv_folder,samplenumber,train_list): ########### Trining and Test Data Spliting ###################### Ensembled_train = open(csv_folder+'Ensembled_train.csv', 'w') Total_data = 0 #Training data prepare F = open(train_list,'r') line = F.readline() while line: Original_signal = [] splitted = line.split(',') for h in range(0,samplenumber): Original_signal.append(float(splitted[h])) disease = splitted[-1][:-1] Original_signal = np.asarray(Original_signal) try: decomposer = EMD(Original_signal,n_imfs=3,maxiter=3000) imfs = decomposer.decompose() ensembled_data = [] for h in range(0,samplenumber): ensembled_data.append(imfs[0][h]+imfs[1][h]+imfs[2][h]) Total_data = Total_data+1 string = str(float("{0:.8f}".format(ensembled_data[0]))) for h in range(1,samplenumber): string = string +','+str(float("{0:.8f}".format(ensembled_data[h]))) string = string+','+disease+'\n' Ensembled_train.write(string) print 'Train Data = ',Total_data,'---Disease = ',disease line = F.readline() except: print 'Could not Write' line = F.readline() Ensembled_train.close() #Ensembled_test.close() F.close()
def optIMFPrediction(): df = loadTestData('table_bac.csv') plt.plot(df[5].values[:]) close_prices = df[5].values[:] close_prices = minmax_scale(close_prices) emd = EMD(close_prices, maxiter=3000) imf = emd.decompose() svrlist = [] predYVals = np.matrix([]) tscv = TimeSeriesSplit(n_splits=500) kf = KFold(n_splits=10, shuffle=True) for i in range(imf.shape[0]): x, y = rollingWindows(imf[i], 500, 0, 3000) svr = svm.SVR(cache_size=1000) parameters = { 'C': [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10] } reg = GridSearchCV(svr, parameters, cv=kf, n_jobs=-1) reg.fit(x, y) print(reg.best_params_) return
def emd(x, ts, n_imfs): x.dtype = 'float64' imfs = np.zeros((n_imfs + 1, x.shape[0])) decomposer = EMD(x) allimfs = decomposer.decompose() if (len(allimfs) > 0): imfs[0, :] = allimfs[0] else: imfs[0, :] = x - imfs.sum(0) if (len(allimfs) > 1): imfs[1, :] = allimfs[1] else: imfs[1, :] = np.zeros((1, x.shape[0])) imfs[-1, :] = x - imfs.sum(0) ''' imfs = np.zeros((n_imfs + 1, x.shape[0])) for i in range(n_imfs): ix = 0 #print ("x",x.shape) #print ("sum",imfs.sum(0).shape) mode = x - imfs.sum(0) nmin, nmax, nzero = map(len, extr(mode)) tmin, tmax, xmin, xmax = boundary_conditions(mode, ts) # while abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))): # mode, amplitude_error = sift(mode, ts) # if amplitude_error <= tol or (utils.judge_stop(mode)): # break if abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))) and len(tmin)>3 and len(tmax)>3: mode, amplitude_error = sift(mode, ts) imfs[i, :] = mode imfs[-1, :] = x - imfs.sum(0) ''' return imfs
def testWithIMFPrediction(): t = time.time() df = loadTestData('table_bac.csv') plt.plot(df[5].values[:]) #plt.show() close_prices = df[5].values[:] print(len(close_prices)) close_prices = minmax_scale(close_prices) emd = EMD(close_prices, maxiter=3000) imf = emd.decompose() plot_imfs(close_prices, imf) plt.plot(hilbert(imf, axis=0).T) plt.show() svrlist = [] predYVals = np.matrix([]) for i in range(7, 8): x, y = rollingWindows(imf[i], 500, 0, 2500) if i == 7: svr = svm.SVR(C=0.1, cache_size=4000) else: svr = svm.SVR(c=10, cache_size=4000) svr.fit(x, y) svrlist.append(svr) testX, testY = rollingWindows(imf[i], 500, 3040, 3400) predY = np.matrix(svr.predict(testX)).T print(predY.shape) try: predYVals = np.concatenate([predYVals, predY], axis=1) except ValueError: predYVals = np.matrix(predY) svr = svm.SVR() svr.fit(imf[7:8, 0:3000].T, close_prices[0:3000]) predPrices = svr.predict(predYVals) print(mean_squared_error(close_prices[3540:3900], predPrices)) print(mean_squared_error(close_prices[3540:3900], close_prices[3539:3899])) print(time.time() - t)
sampling_period=1 / 25600) vmax = np.max(np.abs(coef3)) ax0 = ax[0].pcolormesh(t, freqs1, np.abs(coef1), cmap='jet', vmax=vmax) ax1 = ax[1].pcolormesh(t, freqs2, np.abs(coef2), cmap='jet', vmax=vmax) ax2 = ax[2].pcolormesh(t, freqs3, np.abs(coef3), cmap='jet', vmax=vmax) fig.colorbar(ax2, ax=[ax[0], ax[1], ax[2]]) from pyhht.emd import EMD times = [] fre = [] t = np.arange(0, 0.1, 1 / 25600) f = np.arange(1, 1281) * 10 decomposer = EMD(one_data[200, :, 0]) imfs = decomposer.decompose() temp_fft_data = np.fft.fft(imfs, axis=1) / imfs.shape[1] temp_fft_data = temp_fft_data[:, 1:1281] temp_fft_data = np.abs(temp_fft_data) for i in range(5): plt.subplot(5, 2, i * 2) plt.plot(t, imfs[i, :]) plt.subplot(5, 2, i * 2 + 1) plt.plot(f, temp_fft_data[i, :]) times = [] fre = [] decomposer = EMD(one_data[2200, :, 0]) imfs = decomposer.decompose()
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs): miscle = ['Stable angina', 'Palpitation', 'Unstable angina'] cardiom = [ 'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)' ] ecg_lead = [ 'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'vx', 'vy', 'vz' ] f = open(patient_data) line = f.readline() disease_array = [] while line: splitted = line.split('/') file_name = str(splitted[1][0:8]) patient_folder = str(splitted[0]) total_path = filepath + patient_folder + '/' + file_name print patient_folder, '---', file_name, #print total_path try: signal, ecgrecord = wfdb.rdsamp(total_path) record = wfdb.rdsamp(total_path) print ecgrecord['comments'][4][22:] signal_length = len(signal) repetition = int(math.floor(signal_length / samplenumber)) if not ecgrecord['comments'][4][22:] == 'n/a': disease = ecgrecord['comments'][4][22:] if disease in miscle: disease = "Miscellaneous" elif disease in cardiom: disease = "Cardiomyopathy" if disease not in disease_array: disease_array.append(disease) samplelength = 0 undecomposed = 0 stop = int(math.ceil(repetition * 0.7)) ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0, stop): write_signal = [] for sample in range(samplelength, samplelength + samplenumber): ecg_signal = 0 for i1 in range(0, 15): ecg_signal = ecg_signal + signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: decomposer = EMD(EMD_signal, n_imfs=number_of_IMFs, maxiter=3000) imfs = decomposer.decompose() #print len(imfs) str1 = [] str2 = [] str3 = [] str4 = [] str5 = [] str6 = [] str1.append(imfs[0][0]) str2.append(imfs[1][0]) if (len(imfs) == number_of_IMFs + 1): for h in range(1, samplenumber): str1.append(imfs[0][h]) str2.append(imfs[1][h]) if number_of_IMFs >= 3: str3.append(imfs[2][0]) for h in range(1, samplenumber): str3.append(imfs[2][h]) if number_of_IMFs >= 4: str4.append(imfs[3][0]) for h in range(1, samplenumber): str4.append(imfs[3][h]) if number_of_IMFs >= 5: str5.append(imfs[4][0]) for h in range(1, samplenumber): str5.append(imfs[4][h]) if number_of_IMFs == 6: str6.append(imfs[5][0]) for h in range(1, samplenumber): str6.append(imfs[5][h]) res = [] res.append(imfs[6][0]) for h in range(1, samplenumber): res.append(imfs[6][h]) ### Plot data fig = plt.figure(figsize=(25, 15)) plt.subplot(8, 1, 1) plt.plot(EMD_signal) plt.ylabel('Signal', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 2) plt.plot(str1) plt.ylabel('IMF1', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 3) plt.plot(str2) plt.ylabel('IMF2', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 4) plt.plot(str3) plt.ylabel('IMF3', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 5) plt.plot(str4) plt.ylabel('IMF4', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 6) plt.plot(str5) plt.ylabel('IMF5', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 7) plt.plot(str6) plt.ylabel('IMF6', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 8) plt.plot(res) plt.ylabel('Residual', rotation=0, horizontalalignment='right', fontsize=25) plt.xlabel('Sample Number', fontsize=30) plt.xticks(fontsize=25) plt.yticks(fontsize=25) fig.tight_layout() plt.savefig('PTB_EMD.eps', format='eps', dpi=6000) plt.show() else: print('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength + samplenumber except: print 'Could not be decomposed' samplelength = samplelength + samplenumber line = f.readline() except: problem = patient_folder + '/' + file_name + '\n' line = f.readline() print sys.exc_info(), '\n' f.close() problem_data.close() print disease_array
#plt.plot(df[3].values[:]) close_prices = df[5].values[:] low_prices = df[4].values[:] high_prices = df[3].values[:] encodings = np.array([ longShortEncoding(i, close_prices, high_prices, low_prices, 5, 0.05) for i in range(3600) ]) weights = sampleWeightsByUniqueness(encodings) print(weights) print(encodings[:, 0]) print(encodings.shape) s = minmax_scale(close_prices) emd = EMD(s, maxiter=3000) imf = emd.decompose() plot_imfs(s, imf) predYVals = [] for i in range(7, imf.shape[0]): x, y = rollingWindows(imf[i], 30, 0, 3000) nn = KNeighborsRegressor(n_neighbors=4) nn.fit(x, y) x, y = rollingWindows(imf[i], 30, 3030, 3400) predYNN = nn.predict(x) print(y) print(predYNN) predYVals.append(predYNN)
from pyhht.emd import EMD import pandas as pd import numpy as np import time start = time.clock() from pyhht.visualization import plot_imfs dta = pd.read_csv('C:/Users/dw/Desktop/Data1.csv') seris = np.array(dta.iloc[:, 0]) decomposer = EMD(seris) imfs = decomposer.decompose() plot_imfs(seris, imfs) arr = np.vstack((imfs, seris)) dataframe = pd.DataFrame(arr.T) dataframe.to_csv("C:/Users/dw/Desktop/temp.csv") end = time.clock() print("final is in ", end - start)
os.makedirs('../result/'+name_data+'/real_result') df = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列,日期向下递增, # df_data = df_data['2012-1-4':'2016-12-30'] # 上证 # df_data = df_data['2007-1-3':'2011-12-30'] # sp500 df_data = df_data['2012-5-22':'2014-9-9'] # hs300 沪深300股指期货 文献中没给 经过对比应该是这个区间 df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data),)) # 转换成(sample,)np.array data_tf = data_trans(data) # 数据变换 # data_tf = data_tf[-data_use_num:] # 取最后data_use_num个数据实验 # 分解序列 decomposer = EMD(data_tf) imfs = decomposer.decompose() def multi_emd_ann(lag=3, num_trial=20, hidden=128, epochs=20): pre_data_tf_result = pd.DataFrame() # 百分比结果 real_result = pd.DataFrame() # 预测重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for j in range(num_trial): pr = None start_time = time.time() for i in range(len(imfs)): d = seq_tf_matrix(imfs[i], n=lag+1) x = d[:, :-1]
""" Created on Wed Apr 10 17:57:02 2019 @author: Administrator """ from pyhht.emd import EMD import numpy as np import pandas as pd import matplotlib.pyplot as plt from pyhht.visualization import plot_imfs # 读取数据 #dataset = pd.read_csv('data_day.csv') stock_dir='../dataset/AAPL.csv' dataset = pd.read_csv(open(stock_dir),header=0) dataset=dataset[::-1] for col in dataset.columns: dataset=dataset['Open'] data = dataset.values s = data.ravel() #emd decomposer = EMD(s) IMF = decomposer.decompose() print(IMF.shape) imf_data = pd.DataFrame(IMF.T) imf_data.to_csv('../dataset/emd/emd_AAPL_'+str(col)+'.csv') #绘制分解图 plot_imfs(s,IMF)
def hht_marginal_spectrum(self, dataset, params): # Setting data_path and checking if it's needed to compute this function for more bearings. processed_data_path = 'hht_marginal_spectrum/hht_marginal_spectrum' bearings_marginal_spectrum = dataset.load_processed_data( dataset, processed_data_path) bearings_not_processed = params['bearings'] if bearings_marginal_spectrum[0]: bearings_marginal_spectrum = bearings_marginal_spectrum[1] bearings_processed = list( map(int, list(bearings_marginal_spectrum.keys()))) bearings_not_processed = [ x for x in params['bearings'] if x not in bearings_processed ] if bearings_not_processed == []: return bearings_marginal_spectrum # If can't find any saved file. else: bearings_marginal_spectrum = {} for current_bearing in bearings_not_processed: imfs_files = [] bearing_marginal_spectrum = [] bearing_files = dataset.bearings_files[str(current_bearing)] # Calculating IMFs for each data file. for bearing_file in bearing_files: data = bearing_file[params['vibration_signal']].values decomposer = EMD(data) imfs_files.append(decomposer.decompose()) # Getting the frequency bins. N = len(data) fs = params['sampling_frequency'] freq_bins_step = fs / N freq_bins = np.fft.fftfreq(N)[0:N // 2] * fs # Timestep = 1. # Calculating Hilbert transform for each IMF. imfs_ht_files = [] for imfs_file in imfs_files: imfs_ht_files.append(hilbert(imfs_file)) # Calculating instantaneous frequency of each data. imfs_freqs_files = [] for imfs_ht_file in imfs_ht_files: imfs_freqs_file = [] for imf_ht_file in imfs_ht_file: imfs_freqs_file.append( pyhht.utils.inst_freq(imf_ht_file)[0] * fs ) # [0] to select the frequencies. * fs because the inst_freq return normalized freqs. imfs_freqs_files.append(imfs_freqs_file) # Calculating absolute value and scaling by 1/N factor. N = len(imfs_ht_file[0]) imfs_envelope_files = np.abs(imfs_ht_files) / N # Putting frequencies into the frequency bins and computing Hilbert Marginal Spectrum. imfs_envelope_files_bins = [] for imfs_freqs_file, imfs_envelope_file in zip( imfs_freqs_files, imfs_envelope_files): imfs_envelope_file_bins = [] for imf_freqs_file, imf_envelope_file in zip( imfs_freqs_file, imfs_envelope_file): imfs_envelope_file_ = np.zeros(N // 2) bin_index = [ int(freq // freq_bins_step) for freq in imf_freqs_file ] for index, abs_val in zip(bin_index, imf_envelope_file): imfs_envelope_file_[index] += abs_val imfs_envelope_file_bins.append(imfs_envelope_file_) imfs_envelope_files_bins.append(imfs_envelope_file_bins) # Summing Hilbert Marginal Spectrum of [0 : params['imfs_qty]] imfs. for imfs_envelope_file_bins in imfs_envelope_files_bins: bearing_marginal_spectrum.append([ sum(x) for x in zip( *imfs_envelope_file_bins[0:params['imfs_qty']]) ]) # Saving frequencies, marginal spectrum and hilbert spectrum. bearings_marginal_spectrum[str(current_bearing)] = [ freq_bins, bearing_marginal_spectrum, imfs_envelope_files_bins ] dataset.save_processed_data(bearings_marginal_spectrum, processed_data_path) return bearings_marginal_spectrum
def emd(self): Signal = np.array(self.get_waveform(), dtype=float) Signal = signal.detrend(Signal, type='constant') decomposer = EMD(Signal) IMFs = decomposer.decompose() return IMFs.tolist()
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs): miscle = ['Stable angina', 'Palpitation', 'Unstable angina'] cardiom = [ 'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)' ] ecg_lead = [ 'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'vx', 'vy', 'vz' ] f = open(patient_data) line = f.readline() disease_array = [] while line: #splitted = line.split('/') #file_name = str(splitted[1][0:8]) file_name = line[0:-1] #patient_folder = str(splitted[0]) total_path = filepath + file_name print total_path #try: signal, ecgrecord = wfdb.rdsamp(total_path) record = wfdb.rdsamp(total_path) #print ecgrecord['comments'][4][22:] signal_length = len(signal) repetition = int(math.floor(signal_length / samplenumber)) samplelength = 0 undecomposed = 0 stop = int(math.ceil(repetition * 0.7)) ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0, stop): write_signal = [] for sample in range(samplelength, samplelength + samplenumber): ecg_signal = 0 for i1 in range(0, 12): ecg_signal = ecg_signal + signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) #try: decomposer = EMD(EMD_signal, n_imfs=number_of_IMFs, maxiter=3000) imfs = decomposer.decompose() #print len(imfs) modified_EMD = [] for h in range(0, samplenumber): modified_EMD.append(imfs[0][h] + imfs[1][h] + imfs[2][h]) ### Plot data fig = plt.figure(figsize=(25, 15)) plt.subplot(2, 1, 1) plt.plot(EMD_signal) plt.ylabel('Original Signal\n Amplitude', labelpad=15, fontsize=35) plt.xticks(fontsize=35) plt.yticks(fontsize=35) plt.subplot(2, 1, 2) plt.plot(modified_EMD) plt.ylabel('Modified Signal \n Amplitude', labelpad=15, fontsize=35) plt.xticks(fontsize=35) plt.yticks(fontsize=35) plt.xlabel('Sample Number', fontsize=35) fig.tight_layout() plt.savefig('modified_ECG_Petersburg.eps', format='eps', dpi=6000) plt.show() samplelength = samplelength + samplenumber line = f.readline() f.close() problem_data.close() print disease_array
def test_decomposition(self): """Test the decompose method of the emd class.""" signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) decomposer = EMD(signal, t=self.ts) imfs = decomposer.decompose() self.assertItemsEqual(imfs.shape, (signal.shape[0], 3))
import numpy as np import pandas as pd from sklearn import datasets import matplotlib.pyplot as plt from pyhht.emd import EMD from pyhht.visualization import plot_imfs #载入时间序列数据 data = pd.read_csv('gold_data.csv', usecols=['settle']) #EMD经验模态分解 x = data['settle'] decomposer = EMD(x) imfs = decomposer.decompose() #绘制分解图 plot_imfs(x, imfs, data.index) #保存IMFs arr = np.vstack((imfs, x)) dataframe = pd.DataFrame(arr.T) dataframe.to_csv('D:/imf.csv', index=None, columns=None)
for i in range(partCount): startIndex = i * partLen endIndex = (i + 1) * partLen # temporarily adding neighbor parts for more accurate calculations # todo - hh : only half or quarter of neighbor parts can be enough? if i > 0: # if not first part startIndex -= partLen if i < partCount - 2: # until second from last part endIndex += partLen if i == partCount - 2: # second from last part (last part's len may not be partLen) endIndex += len(sig) % partLen part = sig[startIndex:endIndex] # calculate imfs for the part decomposer = EMD(part) imfsPart = decomposer.decompose()[:-1] # last element is residue # calculate instant frequency for each imf of the part instfPart = [] magPart = [] truncatedImfs = [] for imf in imfsPart: hx = sp.hilbert(imf) mag = np.abs(hx) phx = np.unwrap(np.arctan2(hx.imag, hx.real)) tempInstf = sampRate / (2 * np.pi) * np.diff(phx) # removing neighbor parts after calculations if i > 0: # not first part tempInstf = tempInstf[partLen:]
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 name_data = '000001' path = '../data/'+name_data+'.csv' # 数据的地址 df_01 = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列 df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data),)) # 转换成(sample,)np.array diff = data_trans(data) # 转化为百分比数据 plt.figure(1) plt.plot(diff, label='差分后序列') plt.plot(data, linestyle='--', label='原始序列') plt.legend() plt.figure(figsize=(6, 9)) decomposer = EMD(diff) imfs = decomposer.decompose() num_imfs = imfs.shape[0] plt.subplot(num_imfs+1, 1, 1) plt.plot(diff) plt.ylabel("original") for n in range(num_imfs-1): plt.subplot(num_imfs+1, 1, n+2) plt.plot(imfs[n]) plt.ylabel("imf %i" % (n+1)) plt.subplot(num_imfs+1, 1, num_imfs+1) plt.plot(imfs[-1]) plt.ylabel("res") plt.show()
app_train = app_train.reset_index(drop=True) print(str(name + 1) + '.csv' + "merge Done\n") print('merge shape: ', app_train.shape) #提取统计量 plus_cells = int(app_train.shape[0] / 600) for k in range(0, 600): app_train_temp_origin = app_train[plus_cells * k:plus_cells * (k + 1) - 1] app_train_temp_origin = app_train_temp_origin.reset_index(drop=True) app_train_temp = pd.DataFrame() for col in app_train_temp_origin.columns: x = app_train_temp_origin[col] decomposer = EMD(x, n_imfs=5) imfs = decomposer.decompose() #app_train_temp = pd.DataFrame(imfs.T,columns = ['imf1_'+col,'imf2_'+col,'imf3_'+col,'imf4_'+col,'imf5_'+col,'imf6_'+col,'imf7_'+col,'imf8_'+col,'res_'+col]) col_names = list() for i in range(0, len(imfs) - 1): col_names.append('imf' + str(i) + col) col_names.append('res' + col) imf_df = pd.DataFrame(imfs.T, columns=col_names) app_train_temp = pd.concat([app_train_temp, imf_df], axis=1) app_train_merge_sensor_s = pd.DataFrame() #方差 var_temp = app_train_temp.var() app_train_merge_sensor_s = pd.DataFrame([var_temp.values], columns=var_temp.index + '_var')
def Main(): samplenumber = 5000 File_Path = './Database/MIT-BIH' samp_rating = 360 dir_files1=[] for (dirpath, dirnames, filenames) in os.walk(File_Path): dir_files1 += [os.path.join(File_Path, file[0:-4]) for file in filenames] dir_files = list(set(dir_files1)) dir_files.sort() print dir_files Read_Files = [] avg_min_RR_emd = [] avg_max_RR_emd = [] avg_avg_RR_emd = [] avg_ratio_emd = [] avg_coeff_emd = [] avg_min_RR_orig = [] avg_max_RR_orig = [] avg_avg_RR_orig = [] avg_ratio_orig = [] avg_coeff_orig = [] Diseases = [] ##### Save the Data A = open('./Analysis/MIT-BIH/Analysis_avg_avg_RR.csv','w') B = open('./Analysis/MIT-BIH/Analysis_avg_ratio.csv','w') C = open('./Analysis/MIT-BIH/Analysis_avg_coeff.csv','w') A.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') B.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') C.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') for j in range(0,len(dir_files)): try: print dir_files[j], original_signal,ecgrecord = wfdb.srdsamp(dir_files[j]) record = wfdb.rdsamp(dir_files[j]) data_file = dir_files[j][-3:] sig_diseases = globals()['disease_'+str(data_file)] for gf in sig_diseases: time = globals()['beats_disease_'+str(data_file)][gf] time_split = time.split(':') minutes = time_split[0] seconds = time_split[1] total_seconds = int(minutes)*60 + int(seconds) total_samples = total_seconds * samp_rating disease = gf print gf, initial_start = 0 # per record starting index of each disease of that record ECG_signal = original_signal[initial_start:total_samples] sig_length = len(ECG_signal) print 'original sig length ', len(original_signal), print 'cut_signal_length ',sig_length, repetition = int(math.floor(sig_length/samplenumber)) print 'repeat ', repetition, sig_start = 0 count = 0 for h in range(0,repetition): signal = [] for i in range(sig_start,sig_start+samplenumber): signal.append(ECG_signal[i][0]+ECG_signal[i][1]) try: RR_orig,RR_time_orig,min_RR_orig,max_RR_orig,Average_RR_orig,Ratio_orig,Individual_coeff_orig,Avg_coeff_orig, Avg_template_orig, Individual_Beats_orig = ECG_analysis(signal[0:samplenumber],show=False,sampling_rate=samp_rating) #Read_Files.append(dir_files[j]) #EMD Analysis signal_for_EMD = np.asarray(signal[0:samplenumber]) decomposer = EMD(signal_for_EMD,n_imfs=3,maxiter=3000) imfs = decomposer.decompose() EMD_data = [] for i in range(0,samplenumber): EMD_data.append(imfs[0][i]+imfs[1][i]+imfs[2][i]) RR_emd,RR_time_emd,min_RR_emd,max_RR_emd,Average_RR_emd,Ratio_emd,Individual_coeff_emd,Avg_coeff_emd,Avg_template_emd, Individual_Beats_emd = ECG_analysis(EMD_data[0:samplenumber],show=False,sampling_rate=samp_rating) # Print #print min_RR_emd, ',', min_RR_orig #print max_RR_emd,',',max_RR_orig #print 'AVG_RR_emd=',Average_RR_emd,' Avg_RR_orig=' ,Average_RR_orig, #print Ratio_emd,',',Ratio_orig print 'Emd_coeff=',Avg_coeff_emd,' Orig_coeff=',Avg_coeff_orig, print 'start=',sig_start,' count=',count ''' avg_min_RR_emd.append(min_RR_emd) avg_max_RR_emd.append(max_RR_emd) avg_avg_RR_emd.append(Average_RR_emd) avg_ratio_emd.append(Ratio_emd) avg_coeff_emd.append(Avg_coeff_emd) avg_min_RR_orig.append(min_RR_orig) avg_max_RR_orig.append(max_RR_orig) avg_avg_RR_orig.append(Average_RR_orig) avg_ratio_orig.append(Ratio_orig) avg_coeff_orig.append(Avg_coeff_orig) ''' #Diseases.append(disease) sig_start = sig_start + samplenumber A.write(dir_files[j]+','+str(Average_RR_emd)+','+str(Average_RR_orig)+','+disease+'\n') B.write(dir_files[j]+','+str(Ratio_emd)+','+str(Ratio_orig)+','+disease+'\n') C.write(dir_files[j]+','+str(Avg_coeff_emd)+','+str(Avg_coeff_orig)+','+disease+'\n') count += 1 except: sig_start = sig_start + samplenumber print 'Problem in the cut sequencee' initial_start = total_samples except: print 'Problem: ',dir_files[j][-7:] '''
from pyhht.visualization import plot_imfs import matplotlib.pyplot as plt sys.path.append('data/') from read_PLAID_data import read_source_data source_dir = 'data/source/submetered_new_pured/source/' with open( '/home/chaofan/powerknowledge/data/source/metadata_submetered2.1.json', 'r', encoding='utf8') as load_meta: meta = json.load(load_meta) length1 = 3000 length2 = 3000 t = range(3000) csv_dir = os.listdir(source_dir) for file in csv_dir: file_dir = source_dir + file Switch_V, Switch_I = read_source_data(file_dir, offset=0, length=length1) Stable_V, Stable_I = read_source_data(file_dir, offset=3000, length=length2) tem = np.array(Switch_I) - np.array(Stable_I) decomposer = EMD(tem, n_imfs=3) imfs = decomposer.decompose() plot_imfs(tem, imfs, t)
@URL: @version: V1.0 ''' import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt from pyhht.emd import EMD from pyhht.visualization import plot_imfs column_names = [ 'term', 'date', 'red1', 'red2', 'red3', 'red4', 'red5', 'red6', 'blue', 'appear1', 'appear2', 'appear3', 'appear4', 'appear5', 'appear6', 'prize', 'Total', 'first_num', 'first_amount', 'second_num', 'second_amount', 'third_num', 'third_amount', 'fourth_num', 'fourth_amount', 'fifth_num', 'fifth_amount', 'sixth_num', 'sixth_amount' ] data = pd.read_csv('F:/workspace/Tensorflow/src/ssq/ssq.txt', sep=' ', header=None, names=column_names) print(data.info()) plt.figure(figsize=(15, 5)) x = range(0, len(data['red1']), 1) plt.plot(x, data['red1']) plt.show() decomposer = EMD(data['red1']) imfs = decomposer.decompose() print(imfs.shape) plot_imfs(data['red2'].values, imfs, None)
def EMD_data_preparation(filepath,patient_data,csv_folder,problem_data_file,samplenumber,number_of_IMFs,split_perc): files = glob.glob('./csv_folder/*') for f in files: os.remove(f) problem_data=open(problem_data_file,'w') #PTB Diagnostic ECG database Disease labels miscle=['Stable angina','Palpitation', 'Unstable angina'] cardiom=['Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)'] ecg_lead = ['i','ii','iii','avr','avl','avf','v1','v2','v3','v4','v5','v6','vx','vy','vz'] Sig_Records = {'Bundle branch block': 38092, 'Valvular heart disease': 37647, 'Myocarditis': 39672, 'Healthy control': 37500, 'Dysrhythmia': 39557, 'Myocardial infarction': 38951, 'Cardiomyopathy': 37659} unIMFs = open('./Problem_Data/unIMFs.csv','a') IMF1_train = open(csv_folder+'IMF1_train.csv', 'a') IMF2_train = open(csv_folder+'IMF2_train.csv', 'a') IMF1_test = open(csv_folder+'IMF1_test.csv', 'a') IMF2_test = open(csv_folder+'IMF2_test.csv', 'a') Train_time = open('Train_time.csv','a') Test_time = open('Test_time.csv','a') if number_of_IMFs >= 3: IMF3_train = open(csv_folder+'IMF3_train.csv', 'a') IMF3_test = open(csv_folder+'IMF3_test.csv', 'a') if number_of_IMFs >= 4: IMF4_train = open(csv_folder+'IMF4_train.csv', 'a') IMF4_test = open(csv_folder+'IMF4_test.csv', 'a') if number_of_IMFs >= 5: IMF5_train = open(csv_folder+'IMF5_train.csv', 'a') IMF5_test = open(csv_folder+'IMF5_test.csv', 'a') if number_of_IMFs == 6: IMF6_train = open(csv_folder+'IMF6_train.csv', 'a') IMF6_test = open(csv_folder+'IMF6_test.csv', 'a') f = open(patient_data) line = f.readline() disease_array=[] file_count = 0 while line: file_count += 1 if file_count < 1000: line = f.readline() file_count += 1 print line, file_count else: file_count += 1 splitted = line.split('/') file_name = str(splitted[1][0:8]) patient_folder = str(splitted[0]) total_path = filepath+patient_folder+'/'+file_name print patient_folder,'---',file_name, #print total_path try: signal,ecgrecord = wfdb.srdsamp(total_path) record = wfdb.rdsamp(total_path) print ecgrecord['comments'][4][22:], signal_length = len(signal) #repetition = int(math.floor(signal_length/samplenumber)) if not ecgrecord['comments'][4][22:] == 'n/a': disease = ecgrecord['comments'][4][22:] if disease in miscle: disease = "Miscellaneous" elif disease in cardiom: disease = "Cardiomyopathy" if disease == 'Myocardial infarction': overlap = 1000 elif disease == "Bundle branch block": overlap = 55 elif disease == "Cardiomyopathy": overlap = 55 elif disease == "Dysrhythmia": overlap = 35 elif disease == "Healthy control": overlap = 255 elif disease == "Myocarditis": overlap = 15 elif disease == "Valvular heart disease": overlap = 15 if disease not in disease_array: disease_array.append(disease) samplelength = 0 undecomposed = 0 sig_start_ov = 0 repetition = 0 while(signal_length-sig_start_ov >= samplenumber): repetition += 1 sig_start_ov += overlap stop = int(math.ceil(repetition*split_perc)) print 'repetition = ',repetition ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0,stop): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: start_time_train = time.time() decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000) imfs = decomposer.decompose() #Construct Modified EMD modified_EMD_train = [] for q in range(0,samplenumber): modified_EMD_train.append(imfs[0][q]+imfs[1][q]+imfs[2][q]) elapsed_time_train = time.time() - start_time_train Train_time.write(total_path+','+disease+','+str(elapsed_time_train)+'\n') #print len(imfs) str1 = str(imfs[0][0]) str2 = str(imfs[1][0]) if (len(imfs) == number_of_IMFs+1): for h in range(1,samplenumber): str1 = str1+','+str(imfs[0][h]) str2 = str2+','+str(imfs[1][h]) str1 = str1+','+disease+'\n' str2 = str2+','+disease+'\n' IMF1_train.write(str1) IMF2_train.write(str2) if number_of_IMFs >= 3: str3 = str(imfs[2][0]) for h in range(1,samplenumber): str3 = str3+','+str(imfs[2][h]) str3 = str3+','+disease+'\n' IMF3_train.write(str3) if number_of_IMFs >= 4: str4 = str(imfs[3][0]) for h in range(1,samplenumber): str4 = str4+','+str(imfs[3][h]) str4 = str4+','+disease+'\n' IMF4_train.write(str4) if number_of_IMFs >= 5: str5 = str(imfs[4][0]) for h in range(1,samplenumber): str5 = str5+','+str(imfs[4][h]) str5 = str5+','+disease+'\n' IMF5_train.write(str5) if number_of_IMFs==6: str6 = str(imfs[5][0]) for h in range(1,samplenumber): str6 = str6+','+str(imfs[5][h]) str6 = str6+','+disease+'\n' IMF6_train.write(str6) else: print ('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength+overlap except: print 'Could not be decomposed' samplelength = samplelength+overlap #Testing data preparation for j in range(stop,repetition): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: start_time_test = time.time() decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000) imfs = decomposer.decompose() #Construct Modified EMD modified_EMD_test = [] for q in range(0,samplenumber): modified_EMD_test.append(imfs[0][q]+imfs[1][q]+imfs[2][q]) elapsed_time_test = time.time() - start_time_test Test_time.write(total_path+','+disease+','+str(elapsed_time_test)+'\n') #print len(imfs) str1 = str(imfs[0][0]) str2 = str(imfs[1][0]) if (len(imfs) == number_of_IMFs+1): for h in range(1,samplenumber): str1 = str1+','+str(imfs[0][h]) str2 = str2+','+str(imfs[1][h]) str1 = str1+','+disease+'\n' str2 = str2+','+disease+'\n' IMF1_test.write(str1) IMF2_test.write(str2) if number_of_IMFs >= 3: str3 = str(imfs[2][0]) for h in range(1,samplenumber): str3 = str3+','+str(imfs[2][h]) str3 = str3+','+disease+'\n' IMF3_test.write(str3) if number_of_IMFs >= 4: str4 = str(imfs[3][0]) for h in range(1,samplenumber): str4 = str4+','+str(imfs[3][h]) str4 = str4+','+disease+'\n' IMF4_test.write(str4) if number_of_IMFs >= 5: str5 = str(imfs[4][0]) for h in range(1,samplenumber): str5 = str5+','+str(imfs[4][h]) str5 = str5+','+disease+'\n' IMF5_test.write(str5) if number_of_IMFs==6: str6 = str(imfs[5][0]) for h in range(1,samplenumber): str6 = str6+','+str(imfs[5][h]) str6 = str6+','+disease+'\n' IMF6_test.write(str6) else: print ('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength+overlap except: print 'Could not be decomposed' samplelength = samplelength+overlap string = patient_folder+'---'+file_name+'UNIMFed Records = '+str(undecomposed)+'\n' unIMFs.write(string) line = f.readline() except: problem=patient_folder+'/'+file_name+'\n' problem_data.write(problem) line = f.readline() print sys.exc_info(),'\n' f.close() problem_data.close() print disease_array IMF1_train.close() IMF2_train.close() IMF1_test.close() IMF2_test.close() if number_of_IMFs>=3: IMF3_train.close() IMF3_test.close() if number_of_IMFs>=4: IMF4_train.close() IMF4_test.close() if number_of_IMFs>=5: IMF5_train.close() IMF5_test.close() if number_of_IMFs==6: IMF6_train.close() IMF6_test.close() unIMFs.close()
plt.show() else: print(colored("You have to do at least one EMD first.", 'red')) continue if name not in windset.keys(): print(colored('This dataset is not exist', 'red')) continue cut = raw_input('Cut the zeros head and end?[y/n]') if cut == 'y': cutindex = [ np.nonzero(windset[name])[0][0], np.nonzero(windset[name])[0][-1] ] realwindset = windset[name][cutindex[0]:cutindex[1] + 1] else: realwindset = windset[name] x = np.linspace(1, len(realwindset), len(realwindset)) decomposer = EMD(realwindset) imfs = decomposer.decompose() size = imfs.shape plt.figure() plt.plot(x, realwindset) plt.title(name) plt.show() plt.figure(figsize=(20, 18)) for loop in range(1, size[0] + 1): plt.subplot(size[0], 1, loop) plt.plot(x, imfs[loop - 1]) plt.title(loop) plt.show()
# ene[i]=dum.sum # print(ene[i]) # print(ener) enerdb = 10 * np.log10(ener) ener = ener / max(ener) plt.plot(time[indxt], ener) plt.show() plt.plot(vtime, tmp) plt.xlim(tmin, tmax) plt.show() # In[33]: npt = len(trNfil[0].data) emd = EMD(trNfil[0].data) imfs = emd.decompose() time = (np.linspace(1, npt, npt)) * dt plt.rcParams["figure.figsize"] = (30.0, 50.0) plot_imfs(trNfil[0].data, time, imfs) # In[44]: aa = trNfil[2].copy() plotTrigger(aa, cft, 2.2, 0.5) dm = len(cft) item = [i for i in list(range(dm)) if cft[i] > 2.2] print(min(item))