def load_data_emd(trainNum, testNum, startNum, data): print('EMD_data loading.') global ahead_num # all_data_checked = data targetData = data # 处理预测信息,划分训练集和测试集 targetData = targetData[startNum + 1: startNum + trainNum + testNum + 1] targetData = np.array(targetData).reshape(-1, 1) # #归一化,每个特征分开归一化 global scaler_target scaler_target = StandardScaler(copy=True, with_mean=True, with_std=True) targetData = scaler_target.fit_transform(targetData) decomposer = EMD(targetData) imfs = decomposer.decompose() # plot_imfs(targetData, imfs) data_decomposed = imfs.tolist() for h1 in range(len(data_decomposed)): data_decomposed[h1] = np.array(data_decomposed[h1]).reshape(-1, 1) for h2 in range(len(data_decomposed)): trainX, trainY, testX, testY = create_data(data_decomposed[h2], trainNum, ahead_num) dataset_imf = [trainX, trainY, testX, testY] data_decomposed[h2] = dataset_imf print('load_data complete.\n') return data_decomposed
def emd_plot(data): emd = EMD(data) imfs = emd.decompose() # ipdb.set_trace() plot_imfs(data,imfs) plt.legend('EMD') return imfs
def test_imfs_total_no_error(self): """ Check if the sum of the IMFs is sufficiently close to the input signal. """ signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) emd = EMD(signal) imfs = emd.decompose() assert_allclose(imfs.sum(0), signal)
def test_residue(self): """Test the residue of the emd output.""" signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) decomposer = EMD(signal, t=self.ts) imfs = decomposer.decompose() n_imfs = imfs.shape[0] n_maxima = argrelmax(imfs[n_imfs - 1, :])[0].shape[0] n_minima = argrelmin(imfs[n_imfs - 1, :])[0].shape[0] self.assertTrue(max(n_maxima, n_minima) <= 2)
def test_monotonicity_of_trend(self): """ Check if the trend is monotonic. """ signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) emd = EMD(signal) imfs = emd.decompose() # There should be two IMFs, and the rest of them are trends trend = imfs[3:, :].sum(0) assert_allclose(self.trend, trend)
def hilbert_huang(self): """ Create EMD and Calculate Hilbert-Huang """ imfs_list = [] for i in self.reshaped_x: for j in i: decomposer = EMD(j) imfs = decomposer.decompose() imfs_list.append(imfs) return np.array(imfs_list)
def get_data(l, lag=3): """ 默认滞后3项,预测一步 l 的最后一项不参与分解 """ decomposer = EMD(l[:-1]) # l的最后一项不参与分解 imfs = decomposer.decompose() # 包括m个imf和一个res项 # 得到如下的输入样本,第一个样本(1,lag,m+1),即lag个滞后项,每一项有m+1个元素 # [[imf1_1,imf2_1,...,imfm_1,res_1],[imf1_2,imf2_2,...,imfm_2,res_2],...,[imf1_lag,imf2_lag,...,imfm_lag,res_lag]] x = seq_tf_matrix(imfs.T, lag) # y为输出结果,未来一步的预测值 y = l[-len(x):] return x, y
def EEMD(sample, num_iterations): imf = {} for i in range(0, num_iterations): white_noise = generateWhiteNoise(len(sample)) x = white_noise + sample decomp = EMD(x, maxiter=10000) imfX = decomp.decompose() try: imf[imfX.shape[0]] += imfX except KeyError: imf[imfX.shape[0]] = imfX for key in imf: imf[key] /= key return imf
def EMD_data_preparation(csv_folder,samplenumber,train_list): ########### Trining and Test Data Spliting ###################### Ensembled_train = open(csv_folder+'Ensembled_train.csv', 'w') Total_data = 0 #Training data prepare F = open(train_list,'r') line = F.readline() while line: Original_signal = [] splitted = line.split(',') for h in range(0,samplenumber): Original_signal.append(float(splitted[h])) disease = splitted[-1][:-1] Original_signal = np.asarray(Original_signal) try: decomposer = EMD(Original_signal,n_imfs=3,maxiter=3000) imfs = decomposer.decompose() ensembled_data = [] for h in range(0,samplenumber): ensembled_data.append(imfs[0][h]+imfs[1][h]+imfs[2][h]) Total_data = Total_data+1 string = str(float("{0:.8f}".format(ensembled_data[0]))) for h in range(1,samplenumber): string = string +','+str(float("{0:.8f}".format(ensembled_data[h]))) string = string+','+disease+'\n' Ensembled_train.write(string) print 'Train Data = ',Total_data,'---Disease = ',disease line = F.readline() except: print 'Could not Write' line = F.readline() Ensembled_train.close() #Ensembled_test.close() F.close()
def optIMFPrediction(): df = loadTestData('table_bac.csv') plt.plot(df[5].values[:]) close_prices = df[5].values[:] close_prices = minmax_scale(close_prices) emd = EMD(close_prices, maxiter=3000) imf = emd.decompose() svrlist = [] predYVals = np.matrix([]) tscv = TimeSeriesSplit(n_splits=500) kf = KFold(n_splits=10, shuffle=True) for i in range(imf.shape[0]): x, y = rollingWindows(imf[i], 500, 0, 3000) svr = svm.SVR(cache_size=1000) parameters = { 'C': [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10] } reg = GridSearchCV(svr, parameters, cv=kf, n_jobs=-1) reg.fit(x, y) print(reg.best_params_) return
def testWithIMFPrediction(): t = time.time() df = loadTestData('table_bac.csv') plt.plot(df[5].values[:]) #plt.show() close_prices = df[5].values[:] print(len(close_prices)) close_prices = minmax_scale(close_prices) emd = EMD(close_prices, maxiter=3000) imf = emd.decompose() plot_imfs(close_prices, imf) plt.plot(hilbert(imf, axis=0).T) plt.show() svrlist = [] predYVals = np.matrix([]) for i in range(7, 8): x, y = rollingWindows(imf[i], 500, 0, 2500) if i == 7: svr = svm.SVR(C=0.1, cache_size=4000) else: svr = svm.SVR(c=10, cache_size=4000) svr.fit(x, y) svrlist.append(svr) testX, testY = rollingWindows(imf[i], 500, 3040, 3400) predY = np.matrix(svr.predict(testX)).T print(predY.shape) try: predYVals = np.concatenate([predYVals, predY], axis=1) except ValueError: predYVals = np.matrix(predY) svr = svm.SVR() svr.fit(imf[7:8, 0:3000].T, close_prices[0:3000]) predPrices = svr.predict(predYVals) print(mean_squared_error(close_prices[3540:3900], predPrices)) print(mean_squared_error(close_prices[3540:3900], close_prices[3539:3899])) print(time.time() - t)
def emd(x, ts, n_imfs): x.dtype = 'float64' imfs = np.zeros((n_imfs + 1, x.shape[0])) decomposer = EMD(x) allimfs = decomposer.decompose() if (len(allimfs) > 0): imfs[0, :] = allimfs[0] else: imfs[0, :] = x - imfs.sum(0) if (len(allimfs) > 1): imfs[1, :] = allimfs[1] else: imfs[1, :] = np.zeros((1, x.shape[0])) imfs[-1, :] = x - imfs.sum(0) ''' imfs = np.zeros((n_imfs + 1, x.shape[0])) for i in range(n_imfs): ix = 0 #print ("x",x.shape) #print ("sum",imfs.sum(0).shape) mode = x - imfs.sum(0) nmin, nmax, nzero = map(len, extr(mode)) tmin, tmax, xmin, xmax = boundary_conditions(mode, ts) # while abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))): # mode, amplitude_error = sift(mode, ts) # if amplitude_error <= tol or (utils.judge_stop(mode)): # break if abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))) and len(tmin)>3 and len(tmax)>3: mode, amplitude_error = sift(mode, ts) imfs[i, :] = mode imfs[-1, :] = x - imfs.sum(0) ''' return imfs
def Main(): # load raw ECG signal #signal, mdata = storage.load_txt('./examples/ecg.txt') samplenumber = 5000 File_Path = './Database/PTB/' samp_rating = 1000 dir_files1 = [] for (dirpath, dirnames, filenames) in os.walk(File_Path): dir_files1 += [os.path.join(dirpath, file[0:-4]) for file in filenames] dir_files = list(set(dir_files1)) print dir_files Read_Files = [] avg_min_RR_emd = [] avg_max_RR_emd = [] avg_avg_RR_emd = [] avg_ratio_emd = [] avg_coeff_emd = [] avg_min_RR_orig = [] avg_max_RR_orig = [] avg_avg_RR_orig = [] avg_ratio_orig = [] avg_coeff_orig = [] Diseases = [] ##### Save the Data A = open('./Analysis/PTB/Analysis_avg_avg_RR.csv', 'w') B = open('./Analysis/PTB/Analysis_avg_ratio.csv', 'w') C = open('./Analysis/PTB/Analysis_avg_coeff.csv', 'w') A.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n') B.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n') C.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n') for j in range(0, len(dir_files)): try: print dir_files[j], ECG_signal, ecgrecord = wfdb.srdsamp(dir_files[j]) record = wfdb.rdsamp(dir_files[j]) sig_length = len(ECG_signal) disease = ecgrecord['comments'][4][22:] print disease, #print record.__dict__ repetition = int(math.floor(sig_length / samplenumber)) sig_start = 0 count = 0 for h in range(0, repetition): signal = [] for i in range(sig_start, sig_start + samplenumber): sum = 0 for channel in range(0, 15): sum += ECG_signal[i][channel] signal.append(sum) try: RR_orig, RR_time_orig, min_RR_orig, max_RR_orig, Average_RR_orig, Ratio_orig, Individual_coeff_orig, Avg_coeff_orig, Avg_template_orig, Individual_Beats_orig = ECG_analysis( signal[0:samplenumber], show=False, sampling_rate=samp_rating) #Read_Files.append(dir_files[j]) #EMD Analysis signal_for_EMD = np.asarray(signal[0:samplenumber]) decomposer = EMD(signal_for_EMD, n_imfs=3, maxiter=2000) imfs = decomposer.decompose() EMD_data = [] for i in range(0, samplenumber): EMD_data.append(imfs[0][i] + imfs[1][i] + imfs[2][i]) RR_emd, RR_time_emd, min_RR_emd, max_RR_emd, Average_RR_emd, Ratio_emd, Individual_coeff_emd, Avg_coeff_emd, Avg_template_emd, Individual_Beats_emd = ECG_analysis( EMD_data[0:samplenumber], show=False, sampling_rate=samp_rating) # Print #print min_RR_emd, ',', min_RR_orig #print max_RR_emd,',',max_RR_orig #print 'AVG_RR_emd=',Average_RR_emd,' Avg_RR_orig=' ,Average_RR_orig, #print Ratio_emd,',',Ratio_orig print 'Emd_coeff=', Avg_coeff_emd, ' Orig_coeff=', Avg_coeff_orig, print 'start=', sig_start, ' count=', count ''' avg_min_RR_emd.append(min_RR_emd) avg_max_RR_emd.append(max_RR_emd) avg_avg_RR_emd.append(Average_RR_emd) avg_ratio_emd.append(Ratio_emd) avg_coeff_emd.append(Avg_coeff_emd) avg_min_RR_orig.append(min_RR_orig) avg_max_RR_orig.append(max_RR_orig) avg_avg_RR_orig.append(Average_RR_orig) avg_ratio_orig.append(Ratio_orig) avg_coeff_orig.append(Avg_coeff_orig) ''' #Diseases.append(disease) sig_start = sig_start + samplenumber A.write(dir_files[j] + ',' + str(Average_RR_emd) + ',' + str(Average_RR_orig) + ',' + disease + '\n') B.write(dir_files[j] + ',' + str(Ratio_emd) + ',' + str(Ratio_orig) + ',' + disease + '\n') C.write(dir_files[j] + ',' + str(Avg_coeff_emd) + ',' + str(Avg_coeff_orig) + ',' + disease + '\n') count += 1 except: sig_start = sig_start + samplenumber print 'Problem in the cut sequencee' except: print 'Problem: ', dir_files[j][-7:] '''
close_prices = df[5].values[:] low_prices = df[4].values[:] high_prices = df[3].values[:] encodings = np.array([ longShortEncoding(i, close_prices, high_prices, low_prices, 5, 0.05) for i in range(3600) ]) weights = sampleWeightsByUniqueness(encodings) print(weights) print(encodings[:, 0]) print(encodings.shape) s = minmax_scale(close_prices) emd = EMD(s, maxiter=3000) imf = emd.decompose() plot_imfs(s, imf) predYVals = [] for i in range(7, imf.shape[0]): x, y = rollingWindows(imf[i], 30, 0, 3000) nn = KNeighborsRegressor(n_neighbors=4) nn.fit(x, y) x, y = rollingWindows(imf[i], 30, 3030, 3400) predYNN = nn.predict(x) print(y) print(predYNN) predYVals.append(predYNN) clf2 = KNeighborsClassifier(n_neighbors=2)
""" Created on Wed Apr 10 17:57:02 2019 @author: Administrator """ from pyhht.emd import EMD import numpy as np import pandas as pd import matplotlib.pyplot as plt from pyhht.visualization import plot_imfs # 读取数据 #dataset = pd.read_csv('data_day.csv') stock_dir='../dataset/AAPL.csv' dataset = pd.read_csv(open(stock_dir),header=0) dataset=dataset[::-1] for col in dataset.columns: dataset=dataset['Open'] data = dataset.values s = data.ravel() #emd decomposer = EMD(s) IMF = decomposer.decompose() print(IMF.shape) imf_data = pd.DataFrame(IMF.T) imf_data.to_csv('../dataset/emd/emd_AAPL_'+str(col)+'.csv') #绘制分解图 plot_imfs(s,IMF)
def emd(self): Signal = np.array(self.get_waveform(), dtype=float) Signal = signal.detrend(Signal, type='constant') decomposer = EMD(Signal) IMFs = decomposer.decompose() return IMFs.tolist()
def multi_emd_aann(lag=3, num_trial=2, hidden=128, epochs=20, ignore=ignore): pre_data_tf_result = pd.DataFrame() # 百分比结果 real_result = pd.DataFrame() # 预测重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for j in range(num_trial): result = [] start_time = time.time() # 100(test_num)个测试样本 for k in range(test_num): decomposer = EMD(data_tf[:-test_num + k]) # 最后一项不参与分解 imfs = decomposer.decompose() # 包括m个imf和一个res项 pr = None for i in range(len(imfs)): d = seq_tf_matrix(np.hstack((imfs[i], [0])), n=lag + 1) # 给imfs[i]加上一个值作为最后一项的真实值,只占个位子 x = d[:, :-1] if ignore: x = x[:, :-ignore] # 忽略与预测值最近的ignore项 y = d[:, -1] if pr is None: pr = ann(x, y, test_num=1, batch_size=batch_size, hidden=hidden, epochs=epochs) # 预测的值,子序列预测结果 else: pr = pr + ann(x, y, test_num=1, batch_size=batch_size, hidden=hidden, epochs=epochs) # 预测的值,子序列结果直接相加 result.append(pr[0]) end_time = time.time() pr = np.array(result) restore_value = restore_data(pr, data[-test_num - 1:-1]) # 还原预测值 mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:]) # 保存第i次的结果 pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value # 保存第i次的评估指标 time_.append((end_time - start_time) / 60) # 分钟 mape.append(mape_) mae.append(mae_) mse.append(mse_) rmse.append(rmse_) # 预测结果 pre_data_tf_result['test_percentage'] = data_tf[ -test_num:] # 把真实的,需要预测的百分比值加入 real_result['test_value'] = data[-test_num:] # 把真实的需要预测的原值加入 pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data + '/data_tf_result/lag_' + str(lag) + '_multi_emd_aann_data_tf_result.csv') real_result.to_csv('../' + ada_result + '/' + name_data + '/real_result/lag_' + str(lag) + '_multi_emd_aann_real_result.csv') # 预测结果评价指标 result_evaluation = { 'lag': lag, 'time': time_, 'mape': mape, 'mae': mae, 'mse': mse, 'rmse': rmse } fw = open( '../' + ada_result + '/' + name_data + '/multi_emd_aann_result_evaluation.json', 'a') fw.write(json.dumps(result_evaluation) + '\n') fw.close()
fmax2 = 1.5 * 1.0 / 4 x2 = fmsin(N, fmin2, fmax2, p, N / 2, fmax2)[0] f0 = 1.5 * 1.0 / 16 x3 = amgauss(N, N / 2, N / 8) * fmconst(N, f0)[0] a1 = 1 a2 = 1 a3 = 1 x = np.real(a1 * x1 + a2 * x2 + a3 * x3) x = x / np.max(np.abs(x)) decomposer = EMD(x) imf = decomposer.decompose() n_freq_bins = 256 short_window_length = 127 beta = 3 * np.pi window = kaiser(short_window_length, beta=beta) _, re_spec_sig, _ = spectrogram(x, t, n_freq_bins, window) _, re_spec_imf1, _ = spectrogram(imf[0, :], t, n_freq_bins, window) _, re_spec_imf2, _ = spectrogram(imf[1, :], t, n_freq_bins, window) _, re_spec_imf3, _ = spectrogram(imf[2, :], t, n_freq_bins, window) fig = plt.figure() for i, rspec in enumerate( [re_spec_sig, re_spec_imf1, re_spec_imf2, re_spec_imf3]): rspec = np.abs(rspec)[:128, :]
import pandas as pd import matplotlib.pyplot as plt from pyhht.emd import EMD from pyhht.utils import get_envelops from pyhht.visualization import plot_imfs # In[] # load data t = np.arange(0, 1, 0.01) x = 2 * np.sin(2 * np.pi * 15 * t) + 4 * np.sin(2 * np.pi * 10 * t) * np.sin( 2 * np.pi * t * 0.1) + np.sin(2 * np.pi * 5 * t) upper, lower = get_envelops(x) plt.plot(upper) plt.plot(lower) plt.show() # In[] EMD decompose decomposer = EMD(x) imfs = decomposer.decompose() # Decompose the input signal into IMFs. plot_imfs(x, imfs, t) print('%.3f' % decomposer.io()) plot_imfs.show() plt.plot(imfs[1, :].T) # In[] save IMFs arr = np.vstack((imfs, x)) dataframe = pd.DataFrame(arr.T)
def Main(): samplenumber = 5000 File_Path = './Database/MIT-BIH' samp_rating = 360 dir_files1=[] for (dirpath, dirnames, filenames) in os.walk(File_Path): dir_files1 += [os.path.join(File_Path, file[0:-4]) for file in filenames] dir_files = list(set(dir_files1)) dir_files.sort() print dir_files Read_Files = [] avg_min_RR_emd = [] avg_max_RR_emd = [] avg_avg_RR_emd = [] avg_ratio_emd = [] avg_coeff_emd = [] avg_min_RR_orig = [] avg_max_RR_orig = [] avg_avg_RR_orig = [] avg_ratio_orig = [] avg_coeff_orig = [] Diseases = [] ##### Save the Data A = open('./Analysis/MIT-BIH/Analysis_avg_avg_RR.csv','w') B = open('./Analysis/MIT-BIH/Analysis_avg_ratio.csv','w') C = open('./Analysis/MIT-BIH/Analysis_avg_coeff.csv','w') A.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') B.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') C.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n') for j in range(0,len(dir_files)): try: print dir_files[j], original_signal,ecgrecord = wfdb.srdsamp(dir_files[j]) record = wfdb.rdsamp(dir_files[j]) data_file = dir_files[j][-3:] sig_diseases = globals()['disease_'+str(data_file)] for gf in sig_diseases: time = globals()['beats_disease_'+str(data_file)][gf] time_split = time.split(':') minutes = time_split[0] seconds = time_split[1] total_seconds = int(minutes)*60 + int(seconds) total_samples = total_seconds * samp_rating disease = gf print gf, initial_start = 0 # per record starting index of each disease of that record ECG_signal = original_signal[initial_start:total_samples] sig_length = len(ECG_signal) print 'original sig length ', len(original_signal), print 'cut_signal_length ',sig_length, repetition = int(math.floor(sig_length/samplenumber)) print 'repeat ', repetition, sig_start = 0 count = 0 for h in range(0,repetition): signal = [] for i in range(sig_start,sig_start+samplenumber): signal.append(ECG_signal[i][0]+ECG_signal[i][1]) try: RR_orig,RR_time_orig,min_RR_orig,max_RR_orig,Average_RR_orig,Ratio_orig,Individual_coeff_orig,Avg_coeff_orig, Avg_template_orig, Individual_Beats_orig = ECG_analysis(signal[0:samplenumber],show=False,sampling_rate=samp_rating) #Read_Files.append(dir_files[j]) #EMD Analysis signal_for_EMD = np.asarray(signal[0:samplenumber]) decomposer = EMD(signal_for_EMD,n_imfs=3,maxiter=3000) imfs = decomposer.decompose() EMD_data = [] for i in range(0,samplenumber): EMD_data.append(imfs[0][i]+imfs[1][i]+imfs[2][i]) RR_emd,RR_time_emd,min_RR_emd,max_RR_emd,Average_RR_emd,Ratio_emd,Individual_coeff_emd,Avg_coeff_emd,Avg_template_emd, Individual_Beats_emd = ECG_analysis(EMD_data[0:samplenumber],show=False,sampling_rate=samp_rating) # Print #print min_RR_emd, ',', min_RR_orig #print max_RR_emd,',',max_RR_orig #print 'AVG_RR_emd=',Average_RR_emd,' Avg_RR_orig=' ,Average_RR_orig, #print Ratio_emd,',',Ratio_orig print 'Emd_coeff=',Avg_coeff_emd,' Orig_coeff=',Avg_coeff_orig, print 'start=',sig_start,' count=',count ''' avg_min_RR_emd.append(min_RR_emd) avg_max_RR_emd.append(max_RR_emd) avg_avg_RR_emd.append(Average_RR_emd) avg_ratio_emd.append(Ratio_emd) avg_coeff_emd.append(Avg_coeff_emd) avg_min_RR_orig.append(min_RR_orig) avg_max_RR_orig.append(max_RR_orig) avg_avg_RR_orig.append(Average_RR_orig) avg_ratio_orig.append(Ratio_orig) avg_coeff_orig.append(Avg_coeff_orig) ''' #Diseases.append(disease) sig_start = sig_start + samplenumber A.write(dir_files[j]+','+str(Average_RR_emd)+','+str(Average_RR_orig)+','+disease+'\n') B.write(dir_files[j]+','+str(Ratio_emd)+','+str(Ratio_orig)+','+disease+'\n') C.write(dir_files[j]+','+str(Avg_coeff_emd)+','+str(Avg_coeff_orig)+','+disease+'\n') count += 1 except: sig_start = sig_start + samplenumber print 'Problem in the cut sequencee' initial_start = total_samples except: print 'Problem: ',dir_files[j][-7:] '''
def EMD_data_preparation(filepath,patient_data,csv_folder,problem_data_file,samplenumber,number_of_IMFs,split_perc): files = glob.glob('./csv_folder/*') for f in files: os.remove(f) problem_data=open(problem_data_file,'w') #PTB Diagnostic ECG database Disease labels miscle=['Stable angina','Palpitation', 'Unstable angina'] cardiom=['Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)'] ecg_lead = ['i','ii','iii','avr','avl','avf','v1','v2','v3','v4','v5','v6','vx','vy','vz'] Sig_Records = {'Bundle branch block': 38092, 'Valvular heart disease': 37647, 'Myocarditis': 39672, 'Healthy control': 37500, 'Dysrhythmia': 39557, 'Myocardial infarction': 38951, 'Cardiomyopathy': 37659} unIMFs = open('./Problem_Data/unIMFs.csv','a') IMF1_train = open(csv_folder+'IMF1_train.csv', 'a') IMF2_train = open(csv_folder+'IMF2_train.csv', 'a') IMF1_test = open(csv_folder+'IMF1_test.csv', 'a') IMF2_test = open(csv_folder+'IMF2_test.csv', 'a') Train_time = open('Train_time.csv','a') Test_time = open('Test_time.csv','a') if number_of_IMFs >= 3: IMF3_train = open(csv_folder+'IMF3_train.csv', 'a') IMF3_test = open(csv_folder+'IMF3_test.csv', 'a') if number_of_IMFs >= 4: IMF4_train = open(csv_folder+'IMF4_train.csv', 'a') IMF4_test = open(csv_folder+'IMF4_test.csv', 'a') if number_of_IMFs >= 5: IMF5_train = open(csv_folder+'IMF5_train.csv', 'a') IMF5_test = open(csv_folder+'IMF5_test.csv', 'a') if number_of_IMFs == 6: IMF6_train = open(csv_folder+'IMF6_train.csv', 'a') IMF6_test = open(csv_folder+'IMF6_test.csv', 'a') f = open(patient_data) line = f.readline() disease_array=[] file_count = 0 while line: file_count += 1 if file_count < 1000: line = f.readline() file_count += 1 print line, file_count else: file_count += 1 splitted = line.split('/') file_name = str(splitted[1][0:8]) patient_folder = str(splitted[0]) total_path = filepath+patient_folder+'/'+file_name print patient_folder,'---',file_name, #print total_path try: signal,ecgrecord = wfdb.srdsamp(total_path) record = wfdb.rdsamp(total_path) print ecgrecord['comments'][4][22:], signal_length = len(signal) #repetition = int(math.floor(signal_length/samplenumber)) if not ecgrecord['comments'][4][22:] == 'n/a': disease = ecgrecord['comments'][4][22:] if disease in miscle: disease = "Miscellaneous" elif disease in cardiom: disease = "Cardiomyopathy" if disease == 'Myocardial infarction': overlap = 1000 elif disease == "Bundle branch block": overlap = 55 elif disease == "Cardiomyopathy": overlap = 55 elif disease == "Dysrhythmia": overlap = 35 elif disease == "Healthy control": overlap = 255 elif disease == "Myocarditis": overlap = 15 elif disease == "Valvular heart disease": overlap = 15 if disease not in disease_array: disease_array.append(disease) samplelength = 0 undecomposed = 0 sig_start_ov = 0 repetition = 0 while(signal_length-sig_start_ov >= samplenumber): repetition += 1 sig_start_ov += overlap stop = int(math.ceil(repetition*split_perc)) print 'repetition = ',repetition ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0,stop): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: start_time_train = time.time() decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000) imfs = decomposer.decompose() #Construct Modified EMD modified_EMD_train = [] for q in range(0,samplenumber): modified_EMD_train.append(imfs[0][q]+imfs[1][q]+imfs[2][q]) elapsed_time_train = time.time() - start_time_train Train_time.write(total_path+','+disease+','+str(elapsed_time_train)+'\n') #print len(imfs) str1 = str(imfs[0][0]) str2 = str(imfs[1][0]) if (len(imfs) == number_of_IMFs+1): for h in range(1,samplenumber): str1 = str1+','+str(imfs[0][h]) str2 = str2+','+str(imfs[1][h]) str1 = str1+','+disease+'\n' str2 = str2+','+disease+'\n' IMF1_train.write(str1) IMF2_train.write(str2) if number_of_IMFs >= 3: str3 = str(imfs[2][0]) for h in range(1,samplenumber): str3 = str3+','+str(imfs[2][h]) str3 = str3+','+disease+'\n' IMF3_train.write(str3) if number_of_IMFs >= 4: str4 = str(imfs[3][0]) for h in range(1,samplenumber): str4 = str4+','+str(imfs[3][h]) str4 = str4+','+disease+'\n' IMF4_train.write(str4) if number_of_IMFs >= 5: str5 = str(imfs[4][0]) for h in range(1,samplenumber): str5 = str5+','+str(imfs[4][h]) str5 = str5+','+disease+'\n' IMF5_train.write(str5) if number_of_IMFs==6: str6 = str(imfs[5][0]) for h in range(1,samplenumber): str6 = str6+','+str(imfs[5][h]) str6 = str6+','+disease+'\n' IMF6_train.write(str6) else: print ('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength+overlap except: print 'Could not be decomposed' samplelength = samplelength+overlap #Testing data preparation for j in range(stop,repetition): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: start_time_test = time.time() decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000) imfs = decomposer.decompose() #Construct Modified EMD modified_EMD_test = [] for q in range(0,samplenumber): modified_EMD_test.append(imfs[0][q]+imfs[1][q]+imfs[2][q]) elapsed_time_test = time.time() - start_time_test Test_time.write(total_path+','+disease+','+str(elapsed_time_test)+'\n') #print len(imfs) str1 = str(imfs[0][0]) str2 = str(imfs[1][0]) if (len(imfs) == number_of_IMFs+1): for h in range(1,samplenumber): str1 = str1+','+str(imfs[0][h]) str2 = str2+','+str(imfs[1][h]) str1 = str1+','+disease+'\n' str2 = str2+','+disease+'\n' IMF1_test.write(str1) IMF2_test.write(str2) if number_of_IMFs >= 3: str3 = str(imfs[2][0]) for h in range(1,samplenumber): str3 = str3+','+str(imfs[2][h]) str3 = str3+','+disease+'\n' IMF3_test.write(str3) if number_of_IMFs >= 4: str4 = str(imfs[3][0]) for h in range(1,samplenumber): str4 = str4+','+str(imfs[3][h]) str4 = str4+','+disease+'\n' IMF4_test.write(str4) if number_of_IMFs >= 5: str5 = str(imfs[4][0]) for h in range(1,samplenumber): str5 = str5+','+str(imfs[4][h]) str5 = str5+','+disease+'\n' IMF5_test.write(str5) if number_of_IMFs==6: str6 = str(imfs[5][0]) for h in range(1,samplenumber): str6 = str6+','+str(imfs[5][h]) str6 = str6+','+disease+'\n' IMF6_test.write(str6) else: print ('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength+overlap except: print 'Could not be decomposed' samplelength = samplelength+overlap string = patient_folder+'---'+file_name+'UNIMFed Records = '+str(undecomposed)+'\n' unIMFs.write(string) line = f.readline() except: problem=patient_folder+'/'+file_name+'\n' problem_data.write(problem) line = f.readline() print sys.exc_info(),'\n' f.close() problem_data.close() print disease_array IMF1_train.close() IMF2_train.close() IMF1_test.close() IMF2_test.close() if number_of_IMFs>=3: IMF3_train.close() IMF3_test.close() if number_of_IMFs>=4: IMF4_train.close() IMF4_test.close() if number_of_IMFs>=5: IMF5_train.close() IMF5_test.close() if number_of_IMFs==6: IMF6_train.close() IMF6_test.close() unIMFs.close()
def corrcoef_imfs(seq): decomposer = EMD(seq) imfs = decomposer.decompose() result = [corrcoef(imfs[i], seq) for i in range(len(imfs))] result.append(sum(map(abs, result)) / len(result)) return result
def hht_marginal_spectrum(self, dataset, params): # Setting data_path and checking if it's needed to compute this function for more bearings. processed_data_path = 'hht_marginal_spectrum/hht_marginal_spectrum' bearings_marginal_spectrum = dataset.load_processed_data( dataset, processed_data_path) bearings_not_processed = params['bearings'] if bearings_marginal_spectrum[0]: bearings_marginal_spectrum = bearings_marginal_spectrum[1] bearings_processed = list( map(int, list(bearings_marginal_spectrum.keys()))) bearings_not_processed = [ x for x in params['bearings'] if x not in bearings_processed ] if bearings_not_processed == []: return bearings_marginal_spectrum # If can't find any saved file. else: bearings_marginal_spectrum = {} for current_bearing in bearings_not_processed: imfs_files = [] bearing_marginal_spectrum = [] bearing_files = dataset.bearings_files[str(current_bearing)] # Calculating IMFs for each data file. for bearing_file in bearing_files: data = bearing_file[params['vibration_signal']].values decomposer = EMD(data) imfs_files.append(decomposer.decompose()) # Getting the frequency bins. N = len(data) fs = params['sampling_frequency'] freq_bins_step = fs / N freq_bins = np.fft.fftfreq(N)[0:N // 2] * fs # Timestep = 1. # Calculating Hilbert transform for each IMF. imfs_ht_files = [] for imfs_file in imfs_files: imfs_ht_files.append(hilbert(imfs_file)) # Calculating instantaneous frequency of each data. imfs_freqs_files = [] for imfs_ht_file in imfs_ht_files: imfs_freqs_file = [] for imf_ht_file in imfs_ht_file: imfs_freqs_file.append( pyhht.utils.inst_freq(imf_ht_file)[0] * fs ) # [0] to select the frequencies. * fs because the inst_freq return normalized freqs. imfs_freqs_files.append(imfs_freqs_file) # Calculating absolute value and scaling by 1/N factor. N = len(imfs_ht_file[0]) imfs_envelope_files = np.abs(imfs_ht_files) / N # Putting frequencies into the frequency bins and computing Hilbert Marginal Spectrum. imfs_envelope_files_bins = [] for imfs_freqs_file, imfs_envelope_file in zip( imfs_freqs_files, imfs_envelope_files): imfs_envelope_file_bins = [] for imf_freqs_file, imf_envelope_file in zip( imfs_freqs_file, imfs_envelope_file): imfs_envelope_file_ = np.zeros(N // 2) bin_index = [ int(freq // freq_bins_step) for freq in imf_freqs_file ] for index, abs_val in zip(bin_index, imf_envelope_file): imfs_envelope_file_[index] += abs_val imfs_envelope_file_bins.append(imfs_envelope_file_) imfs_envelope_files_bins.append(imfs_envelope_file_bins) # Summing Hilbert Marginal Spectrum of [0 : params['imfs_qty]] imfs. for imfs_envelope_file_bins in imfs_envelope_files_bins: bearing_marginal_spectrum.append([ sum(x) for x in zip( *imfs_envelope_file_bins[0:params['imfs_qty']]) ]) # Saving frequencies, marginal spectrum and hilbert spectrum. bearings_marginal_spectrum[str(current_bearing)] = [ freq_bins, bearing_marginal_spectrum, imfs_envelope_files_bins ] dataset.save_processed_data(bearings_marginal_spectrum, processed_data_path) return bearings_marginal_spectrum
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs): miscle = ['Stable angina', 'Palpitation', 'Unstable angina'] cardiom = [ 'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)' ] ecg_lead = [ 'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'vx', 'vy', 'vz' ] f = open(patient_data) line = f.readline() disease_array = [] while line: #splitted = line.split('/') #file_name = str(splitted[1][0:8]) file_name = line[0:-1] #patient_folder = str(splitted[0]) total_path = filepath + file_name print total_path #try: signal, ecgrecord = wfdb.rdsamp(total_path) record = wfdb.rdsamp(total_path) #print ecgrecord['comments'][4][22:] signal_length = len(signal) repetition = int(math.floor(signal_length / samplenumber)) samplelength = 0 undecomposed = 0 stop = int(math.ceil(repetition * 0.7)) ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0, stop): write_signal = [] for sample in range(samplelength, samplelength + samplenumber): ecg_signal = 0 for i1 in range(0, 12): ecg_signal = ecg_signal + signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) #try: decomposer = EMD(EMD_signal, n_imfs=number_of_IMFs, maxiter=3000) imfs = decomposer.decompose() #print len(imfs) modified_EMD = [] for h in range(0, samplenumber): modified_EMD.append(imfs[0][h] + imfs[1][h] + imfs[2][h]) ### Plot data fig = plt.figure(figsize=(25, 15)) plt.subplot(2, 1, 1) plt.plot(EMD_signal) plt.ylabel('Original Signal\n Amplitude', labelpad=15, fontsize=35) plt.xticks(fontsize=35) plt.yticks(fontsize=35) plt.subplot(2, 1, 2) plt.plot(modified_EMD) plt.ylabel('Modified Signal \n Amplitude', labelpad=15, fontsize=35) plt.xticks(fontsize=35) plt.yticks(fontsize=35) plt.xlabel('Sample Number', fontsize=35) fig.tight_layout() plt.savefig('modified_ECG_Petersburg.eps', format='eps', dpi=6000) plt.show() samplelength = samplelength + samplenumber line = f.readline() f.close() problem_data.close() print disease_array
# print(ene[i]) # print(ener) enerdb = 10 * np.log10(ener) ener = ener / max(ener) plt.plot(time[indxt], ener) plt.show() plt.plot(vtime, tmp) plt.xlim(tmin, tmax) plt.show() # In[33]: npt = len(trNfil[0].data) emd = EMD(trNfil[0].data) imfs = emd.decompose() time = (np.linspace(1, npt, npt)) * dt plt.rcParams["figure.figsize"] = (30.0, 50.0) plot_imfs(trNfil[0].data, time, imfs) # In[44]: aa = trNfil[2].copy() plotTrigger(aa, cft, 2.2, 0.5) dm = len(cft) item = [i for i in list(range(dm)) if cft[i] > 2.2] print(min(item)) ene = [0] * dm
import matplotlib matplotlib.use("TkAgg") from pyhht.emd import EMD from pyhht.visualization import plot_imfs import matplotlib.pyplot as plt from ELM import HiddenLayer data = pd.read_csv('gold_data.csv', usecols=['settle']) x = data['settle'] #输入原始数据 y = x #输出原始数据 x = x.as_matrix(columns=None) #输入矩阵 y = y.as_matrix(columns=None) #转为输出矩阵 X = x #ELM专用原始数据 decomposer = EMD(x) imfs = decomposer.decompose() #emd分解 p_days = 1 #预测p_days后的结果 p = 6 #用前p天的数据预测 C = 10**8 #正则化因子 t = 300 #时间分割点 for i in range(6): #分解 x_imfs = imfs[i] y_imfs = x #分解后的y num_data = x.shape[0] y_in = y_imfs[p + p_days - 1:num_data] #去除y的前p个数据 x_in = np.zeros(shape=(1, p)) #生成一列p行的输入矩阵 for j in range(num_data - p + 1 - p_days): #将原始x按照每p个一组转为矩阵 -p_days是因为最后一天要预测故前置 x_temp = x_imfs[j:j + p] #截取p个数为矩阵的一列
for i in range(partCount): startIndex = i * partLen endIndex = (i + 1) * partLen # temporarily adding neighbor parts for more accurate calculations # todo - hh : only half or quarter of neighbor parts can be enough? if i > 0: # if not first part startIndex -= partLen if i < partCount - 2: # until second from last part endIndex += partLen if i == partCount - 2: # second from last part (last part's len may not be partLen) endIndex += len(sig) % partLen part = sig[startIndex:endIndex] # calculate imfs for the part decomposer = EMD(part) imfsPart = decomposer.decompose()[:-1] # last element is residue # calculate instant frequency for each imf of the part instfPart = [] magPart = [] truncatedImfs = [] for imf in imfsPart: hx = sp.hilbert(imf) mag = np.abs(hx) phx = np.unwrap(np.arctan2(hx.imag, hx.real)) tempInstf = sampRate / (2 * np.pi) * np.diff(phx) # removing neighbor parts after calculations if i > 0: # not first part tempInstf = tempInstf[partLen:] mag = mag[partLen:]
def test_decomposition(self): """Test the decompose method of the emd class.""" signal = np.sum([self.trend, self.mode1, self.mode2], axis=0) decomposer = EMD(signal, t=self.ts) imfs = decomposer.decompose() self.assertItemsEqual(imfs.shape, (signal.shape[0], 3))
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs): miscle = ['Stable angina', 'Palpitation', 'Unstable angina'] cardiom = [ 'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)' ] ecg_lead = [ 'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'vx', 'vy', 'vz' ] f = open(patient_data) line = f.readline() disease_array = [] while line: splitted = line.split('/') file_name = str(splitted[1][0:8]) patient_folder = str(splitted[0]) total_path = filepath + patient_folder + '/' + file_name print patient_folder, '---', file_name, #print total_path try: signal, ecgrecord = wfdb.rdsamp(total_path) record = wfdb.rdsamp(total_path) print ecgrecord['comments'][4][22:] signal_length = len(signal) repetition = int(math.floor(signal_length / samplenumber)) if not ecgrecord['comments'][4][22:] == 'n/a': disease = ecgrecord['comments'][4][22:] if disease in miscle: disease = "Miscellaneous" elif disease in cardiom: disease = "Cardiomyopathy" if disease not in disease_array: disease_array.append(disease) samplelength = 0 undecomposed = 0 stop = int(math.ceil(repetition * 0.7)) ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0, stop): write_signal = [] for sample in range(samplelength, samplelength + samplenumber): ecg_signal = 0 for i1 in range(0, 15): ecg_signal = ecg_signal + signal[sample][i1] write_signal.append(ecg_signal) EMD_signal = np.asarray(write_signal) try: decomposer = EMD(EMD_signal, n_imfs=number_of_IMFs, maxiter=3000) imfs = decomposer.decompose() #print len(imfs) str1 = [] str2 = [] str3 = [] str4 = [] str5 = [] str6 = [] str1.append(imfs[0][0]) str2.append(imfs[1][0]) if (len(imfs) == number_of_IMFs + 1): for h in range(1, samplenumber): str1.append(imfs[0][h]) str2.append(imfs[1][h]) if number_of_IMFs >= 3: str3.append(imfs[2][0]) for h in range(1, samplenumber): str3.append(imfs[2][h]) if number_of_IMFs >= 4: str4.append(imfs[3][0]) for h in range(1, samplenumber): str4.append(imfs[3][h]) if number_of_IMFs >= 5: str5.append(imfs[4][0]) for h in range(1, samplenumber): str5.append(imfs[4][h]) if number_of_IMFs == 6: str6.append(imfs[5][0]) for h in range(1, samplenumber): str6.append(imfs[5][h]) res = [] res.append(imfs[6][0]) for h in range(1, samplenumber): res.append(imfs[6][h]) ### Plot data fig = plt.figure(figsize=(25, 15)) plt.subplot(8, 1, 1) plt.plot(EMD_signal) plt.ylabel('Signal', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 2) plt.plot(str1) plt.ylabel('IMF1', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 3) plt.plot(str2) plt.ylabel('IMF2', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 4) plt.plot(str3) plt.ylabel('IMF3', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 5) plt.plot(str4) plt.ylabel('IMF4', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 6) plt.plot(str5) plt.ylabel('IMF5', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 7) plt.plot(str6) plt.ylabel('IMF6', rotation=0, horizontalalignment='right', fontsize=25) plt.xticks(fontsize=25) plt.yticks(fontsize=25) plt.subplot(8, 1, 8) plt.plot(res) plt.ylabel('Residual', rotation=0, horizontalalignment='right', fontsize=25) plt.xlabel('Sample Number', fontsize=30) plt.xticks(fontsize=25) plt.yticks(fontsize=25) fig.tight_layout() plt.savefig('PTB_EMD.eps', format='eps', dpi=6000) plt.show() else: print('IMF Number do not match') undecomposed = undecomposed + 1 samplelength = samplelength + samplenumber except: print 'Could not be decomposed' samplelength = samplelength + samplenumber line = f.readline() except: problem = patient_folder + '/' + file_name + '\n' line = f.readline() print sys.exc_info(), '\n' f.close() problem_data.close() print disease_array
plt.show() else: print(colored("You have to do at least one EMD first.", 'red')) continue if name not in windset.keys(): print(colored('This dataset is not exist', 'red')) continue cut = raw_input('Cut the zeros head and end?[y/n]') if cut == 'y': cutindex = [ np.nonzero(windset[name])[0][0], np.nonzero(windset[name])[0][-1] ] realwindset = windset[name][cutindex[0]:cutindex[1] + 1] else: realwindset = windset[name] x = np.linspace(1, len(realwindset), len(realwindset)) decomposer = EMD(realwindset) imfs = decomposer.decompose() size = imfs.shape plt.figure() plt.plot(x, realwindset) plt.title(name) plt.show() plt.figure(figsize=(20, 18)) for loop in range(1, size[0] + 1): plt.subplot(size[0], 1, loop) plt.plot(x, imfs[loop - 1]) plt.title(loop) plt.show()
def theta(seq): decomposer = EMD(seq) imfs = decomposer.decompose() rms_imfs = sum([rms(imfs[i])**2 for i in range(len(imfs))]) rms_original = rms(seq) return abs(math.sqrt(rms_imfs) - rms_original) / rms_original