Ejemplo n.º 1
0
def load_data_emd(trainNum, testNum, startNum, data):
    print('EMD_data loading.')

    global ahead_num
    # all_data_checked = data

    targetData = data

    # 处理预测信息,划分训练集和测试集
    targetData = targetData[startNum + 1: startNum + trainNum + testNum + 1]
    targetData = np.array(targetData).reshape(-1, 1)

    # #归一化,每个特征分开归一化
    global scaler_target
    scaler_target = StandardScaler(copy=True, with_mean=True, with_std=True)
    targetData = scaler_target.fit_transform(targetData)

    decomposer = EMD(targetData)
    imfs = decomposer.decompose()
    # plot_imfs(targetData, imfs)
    data_decomposed = imfs.tolist()

    for h1 in range(len(data_decomposed)):
        data_decomposed[h1] = np.array(data_decomposed[h1]).reshape(-1, 1)
    for h2 in range(len(data_decomposed)):
        trainX, trainY, testX, testY = create_data(data_decomposed[h2], trainNum, ahead_num)
        dataset_imf = [trainX, trainY, testX, testY]
        data_decomposed[h2] = dataset_imf

    print('load_data complete.\n')

    return data_decomposed
Ejemplo n.º 2
0
def emd_plot(data):
    emd = EMD(data)
    imfs = emd.decompose()
    # ipdb.set_trace()
    plot_imfs(data,imfs)
    plt.legend('EMD')
    return imfs
Ejemplo n.º 3
0
 def test_imfs_total_no_error(self):
     """
     Check if the sum of the IMFs is sufficiently close to the input signal.
     """
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     emd = EMD(signal)
     imfs = emd.decompose()
     assert_allclose(imfs.sum(0), signal)
Ejemplo n.º 4
0
 def test_imfs_total_no_error(self):
     """
     Check if the sum of the IMFs is sufficiently close to the input signal.
     """
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     emd = EMD(signal)
     imfs = emd.decompose()
     assert_allclose(imfs.sum(0), signal)
Ejemplo n.º 5
0
 def test_residue(self):
     """Test the residue of the emd output."""
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     decomposer = EMD(signal, t=self.ts)
     imfs = decomposer.decompose()
     n_imfs = imfs.shape[0]
     n_maxima = argrelmax(imfs[n_imfs - 1, :])[0].shape[0]
     n_minima = argrelmin(imfs[n_imfs - 1, :])[0].shape[0]
     self.assertTrue(max(n_maxima, n_minima) <= 2)
Ejemplo n.º 6
0
 def test_residue(self):
     """Test the residue of the emd output."""
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     decomposer = EMD(signal, t=self.ts)
     imfs = decomposer.decompose()
     n_imfs = imfs.shape[0]
     n_maxima = argrelmax(imfs[n_imfs - 1, :])[0].shape[0]
     n_minima = argrelmin(imfs[n_imfs - 1, :])[0].shape[0]
     self.assertTrue(max(n_maxima, n_minima) <= 2)
Ejemplo n.º 7
0
 def test_monotonicity_of_trend(self):
     """
     Check if the trend is monotonic.
     """
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     emd = EMD(signal)
     imfs = emd.decompose()
     # There should be two IMFs, and the rest of them are trends
     trend = imfs[3:, :].sum(0)
     assert_allclose(self.trend, trend)
Ejemplo n.º 8
0
    def hilbert_huang(self):
        """
        Create EMD and Calculate Hilbert-Huang
        """

        imfs_list = []
        for i in self.reshaped_x:
            for j in i:
                decomposer = EMD(j)
                imfs = decomposer.decompose()
                imfs_list.append(imfs)
        return np.array(imfs_list)
Ejemplo n.º 9
0
def get_data(l, lag=3):
    """
    默认滞后3项,预测一步
    l 的最后一项不参与分解
    """
    decomposer = EMD(l[:-1])  # l的最后一项不参与分解
    imfs = decomposer.decompose()  # 包括m个imf和一个res项
    #  得到如下的输入样本,第一个样本(1,lag,m+1),即lag个滞后项,每一项有m+1个元素
    #  [[imf1_1,imf2_1,...,imfm_1,res_1],[imf1_2,imf2_2,...,imfm_2,res_2],...,[imf1_lag,imf2_lag,...,imfm_lag,res_lag]]
    x = seq_tf_matrix(imfs.T, lag)
    #  y为输出结果,未来一步的预测值
    y = l[-len(x):]
    return x, y
Ejemplo n.º 10
0
def EEMD(sample, num_iterations):
    imf = {}
    for i in range(0, num_iterations):
        white_noise = generateWhiteNoise(len(sample))
        x = white_noise + sample
        decomp = EMD(x, maxiter=10000)
        imfX = decomp.decompose()
        try:
            imf[imfX.shape[0]] += imfX
        except KeyError:
            imf[imfX.shape[0]] = imfX
    for key in imf:
        imf[key] /= key
    return imf
def EMD_data_preparation(csv_folder,samplenumber,train_list):
	###########    Trining and Test Data Spliting   ######################
	Ensembled_train = open(csv_folder+'Ensembled_train.csv', 'w')
	Total_data = 0
	#Training data prepare
	F = open(train_list,'r')
	line = F.readline()
	while line:
		Original_signal = []
		splitted = line.split(',')
		
		for h in range(0,samplenumber):
			Original_signal.append(float(splitted[h])) 
		disease = splitted[-1][:-1]
		
		Original_signal = np.asarray(Original_signal)
		try:
			decomposer = EMD(Original_signal,n_imfs=3,maxiter=3000)
			imfs = decomposer.decompose()
	
			ensembled_data = []
			for h in range(0,samplenumber):
				ensembled_data.append(imfs[0][h]+imfs[1][h]+imfs[2][h])
	
			Total_data = Total_data+1

			string = str(float("{0:.8f}".format(ensembled_data[0])))

			for h in range(1,samplenumber):
				string = string +','+str(float("{0:.8f}".format(ensembled_data[h])))
			string = string+','+disease+'\n'
			Ensembled_train.write(string)

			print 'Train Data = ',Total_data,'---Disease = ',disease
			line = F.readline()
		
		except:
			print 'Could not Write'
			line = F.readline()
		
	

	Ensembled_train.close()
	#Ensembled_test.close()
	F.close()
Ejemplo n.º 12
0
def optIMFPrediction():
    df = loadTestData('table_bac.csv')
    plt.plot(df[5].values[:])
    close_prices = df[5].values[:]
    close_prices = minmax_scale(close_prices)
    emd = EMD(close_prices, maxiter=3000)
    imf = emd.decompose()
    svrlist = []
    predYVals = np.matrix([])
    tscv = TimeSeriesSplit(n_splits=500)
    kf = KFold(n_splits=10, shuffle=True)
    for i in range(imf.shape[0]):
        x, y = rollingWindows(imf[i], 500, 0, 3000)
        svr = svm.SVR(cache_size=1000)
        parameters = {
            'C': [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10]
        }
        reg = GridSearchCV(svr, parameters, cv=kf, n_jobs=-1)
        reg.fit(x, y)
        print(reg.best_params_)
    return
Ejemplo n.º 13
0
def testWithIMFPrediction():
    t = time.time()
    df = loadTestData('table_bac.csv')
    plt.plot(df[5].values[:])
    #plt.show()
    close_prices = df[5].values[:]
    print(len(close_prices))
    close_prices = minmax_scale(close_prices)
    emd = EMD(close_prices, maxiter=3000)
    imf = emd.decompose()
    plot_imfs(close_prices, imf)
    plt.plot(hilbert(imf, axis=0).T)
    plt.show()
    svrlist = []
    predYVals = np.matrix([])
    for i in range(7, 8):
        x, y = rollingWindows(imf[i], 500, 0, 2500)
        if i == 7:
            svr = svm.SVR(C=0.1, cache_size=4000)
        else:
            svr = svm.SVR(c=10, cache_size=4000)
        svr.fit(x, y)
        svrlist.append(svr)
        testX, testY = rollingWindows(imf[i], 500, 3040, 3400)
        predY = np.matrix(svr.predict(testX)).T
        print(predY.shape)
        try:
            predYVals = np.concatenate([predYVals, predY], axis=1)
        except ValueError:
            predYVals = np.matrix(predY)
    svr = svm.SVR()
    svr.fit(imf[7:8, 0:3000].T, close_prices[0:3000])
    predPrices = svr.predict(predYVals)
    print(mean_squared_error(close_prices[3540:3900], predPrices))
    print(mean_squared_error(close_prices[3540:3900], close_prices[3539:3899]))
    print(time.time() - t)
Ejemplo n.º 14
0
def emd(x, ts, n_imfs):

    x.dtype = 'float64'
    imfs = np.zeros((n_imfs + 1, x.shape[0]))
    decomposer = EMD(x)
    allimfs = decomposer.decompose()
    if (len(allimfs) > 0):
        imfs[0, :] = allimfs[0]
    else:
        imfs[0, :] = x - imfs.sum(0)

    if (len(allimfs) > 1):
        imfs[1, :] = allimfs[1]
    else:
        imfs[1, :] = np.zeros((1, x.shape[0]))

    imfs[-1, :] = x - imfs.sum(0)
    '''
    imfs = np.zeros((n_imfs + 1, x.shape[0]))
    for i in range(n_imfs):
        ix = 0
        #print ("x",x.shape)
        #print ("sum",imfs.sum(0).shape)
        mode = x - imfs.sum(0)
        nmin, nmax, nzero = map(len, extr(mode))
        tmin, tmax, xmin, xmax = boundary_conditions(mode, ts)
        #             while abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))):
        #                 mode, amplitude_error = sift(mode, ts)
        #                 if amplitude_error <= tol or (utils.judge_stop(mode)):
        #                     break
        if abs((nmin + nmax) - nzero) > 1 and (not (utils.judge_stop(mode))) and len(tmin)>3 and len(tmax)>3:
            mode, amplitude_error = sift(mode, ts)
        imfs[i, :] = mode
    imfs[-1, :] = x - imfs.sum(0)
    '''
    return imfs
def Main():
    # load raw ECG signal
    #signal, mdata = storage.load_txt('./examples/ecg.txt')

    samplenumber = 5000
    File_Path = './Database/PTB/'
    samp_rating = 1000

    dir_files1 = []
    for (dirpath, dirnames, filenames) in os.walk(File_Path):
        dir_files1 += [os.path.join(dirpath, file[0:-4]) for file in filenames]

    dir_files = list(set(dir_files1))
    print dir_files
    Read_Files = []

    avg_min_RR_emd = []
    avg_max_RR_emd = []
    avg_avg_RR_emd = []
    avg_ratio_emd = []
    avg_coeff_emd = []

    avg_min_RR_orig = []
    avg_max_RR_orig = []
    avg_avg_RR_orig = []
    avg_ratio_orig = []
    avg_coeff_orig = []
    Diseases = []

    ##### Save the Data
    A = open('./Analysis/PTB/Analysis_avg_avg_RR.csv', 'w')
    B = open('./Analysis/PTB/Analysis_avg_ratio.csv', 'w')
    C = open('./Analysis/PTB/Analysis_avg_coeff.csv', 'w')
    A.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n')
    B.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n')
    C.write('Patient_ID' + ',' + 'EMD' + ',' + 'Original' + '\n')

    for j in range(0, len(dir_files)):
        try:
            print dir_files[j],
            ECG_signal, ecgrecord = wfdb.srdsamp(dir_files[j])
            record = wfdb.rdsamp(dir_files[j])
            sig_length = len(ECG_signal)
            disease = ecgrecord['comments'][4][22:]
            print disease,
            #print record.__dict__

            repetition = int(math.floor(sig_length / samplenumber))
            sig_start = 0
            count = 0
            for h in range(0, repetition):
                signal = []
                for i in range(sig_start, sig_start + samplenumber):
                    sum = 0
                    for channel in range(0, 15):
                        sum += ECG_signal[i][channel]
                    signal.append(sum)
                try:
                    RR_orig, RR_time_orig, min_RR_orig, max_RR_orig, Average_RR_orig, Ratio_orig, Individual_coeff_orig, Avg_coeff_orig, Avg_template_orig, Individual_Beats_orig = ECG_analysis(
                        signal[0:samplenumber],
                        show=False,
                        sampling_rate=samp_rating)
                    #Read_Files.append(dir_files[j])
                    #EMD Analysis
                    signal_for_EMD = np.asarray(signal[0:samplenumber])

                    decomposer = EMD(signal_for_EMD, n_imfs=3, maxiter=2000)
                    imfs = decomposer.decompose()

                    EMD_data = []
                    for i in range(0, samplenumber):
                        EMD_data.append(imfs[0][i] + imfs[1][i] + imfs[2][i])
                    RR_emd, RR_time_emd, min_RR_emd, max_RR_emd, Average_RR_emd, Ratio_emd, Individual_coeff_emd, Avg_coeff_emd, Avg_template_emd, Individual_Beats_emd = ECG_analysis(
                        EMD_data[0:samplenumber],
                        show=False,
                        sampling_rate=samp_rating)

                    # Print
                    #print min_RR_emd, ',', min_RR_orig
                    #print max_RR_emd,',',max_RR_orig
                    #print 'AVG_RR_emd=',Average_RR_emd,' Avg_RR_orig=' ,Average_RR_orig,
                    #print Ratio_emd,',',Ratio_orig
                    print 'Emd_coeff=', Avg_coeff_emd, ' Orig_coeff=', Avg_coeff_orig,
                    print 'start=', sig_start, ' count=', count
                    '''
                    avg_min_RR_emd.append(min_RR_emd)
                    avg_max_RR_emd.append(max_RR_emd)
                    avg_avg_RR_emd.append(Average_RR_emd)
                    avg_ratio_emd.append(Ratio_emd)
                    avg_coeff_emd.append(Avg_coeff_emd)

                    avg_min_RR_orig.append(min_RR_orig)
                    avg_max_RR_orig.append(max_RR_orig)
                    avg_avg_RR_orig.append(Average_RR_orig)
                    avg_ratio_orig.append(Ratio_orig)
                    avg_coeff_orig.append(Avg_coeff_orig)
                    '''

                    #Diseases.append(disease)
                    sig_start = sig_start + samplenumber

                    A.write(dir_files[j] + ',' + str(Average_RR_emd) + ',' +
                            str(Average_RR_orig) + ',' + disease + '\n')
                    B.write(dir_files[j] + ',' + str(Ratio_emd) + ',' +
                            str(Ratio_orig) + ',' + disease + '\n')
                    C.write(dir_files[j] + ',' + str(Avg_coeff_emd) + ',' +
                            str(Avg_coeff_orig) + ',' + disease + '\n')
                    count += 1
                except:
                    sig_start = sig_start + samplenumber
                    print 'Problem in the cut sequencee'

        except:
            print 'Problem: ', dir_files[j][-7:]
    '''
Ejemplo n.º 16
0
    close_prices = df[5].values[:]
    low_prices = df[4].values[:]
    high_prices = df[3].values[:]
    encodings = np.array([
        longShortEncoding(i, close_prices, high_prices, low_prices, 5, 0.05)
        for i in range(3600)
    ])

    weights = sampleWeightsByUniqueness(encodings)
    print(weights)
    print(encodings[:, 0])
    print(encodings.shape)

    s = minmax_scale(close_prices)
    emd = EMD(s, maxiter=3000)
    imf = emd.decompose()

    plot_imfs(s, imf)

    predYVals = []
    for i in range(7, imf.shape[0]):
        x, y = rollingWindows(imf[i], 30, 0, 3000)
        nn = KNeighborsRegressor(n_neighbors=4)
        nn.fit(x, y)
        x, y = rollingWindows(imf[i], 30, 3030, 3400)
        predYNN = nn.predict(x)
        print(y)
        print(predYNN)
        predYVals.append(predYNN)

    clf2 = KNeighborsClassifier(n_neighbors=2)
"""
Created on Wed Apr 10 17:57:02 2019

@author: Administrator
"""

from pyhht.emd import EMD
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyhht.visualization import plot_imfs

# 读取数据
#dataset = pd.read_csv('data_day.csv')
stock_dir='../dataset/AAPL.csv'
dataset = pd.read_csv(open(stock_dir),header=0)
dataset=dataset[::-1]
for col in dataset.columns:
    dataset=dataset['Open']
    data = dataset.values
    s = data.ravel()
    #emd
    decomposer = EMD(s)               
    IMF = decomposer.decompose()
    print(IMF.shape)
    imf_data = pd.DataFrame(IMF.T)
    imf_data.to_csv('../dataset/emd/emd_AAPL_'+str(col)+'.csv')
    #绘制分解图
    plot_imfs(s,IMF)

Ejemplo n.º 18
0
 def emd(self):
     Signal = np.array(self.get_waveform(), dtype=float)
     Signal = signal.detrend(Signal, type='constant')
     decomposer = EMD(Signal)
     IMFs = decomposer.decompose()
     return IMFs.tolist()
Ejemplo n.º 19
0
def multi_emd_aann(lag=3, num_trial=2, hidden=128, epochs=20, ignore=ignore):
    pre_data_tf_result = pd.DataFrame()  # 百分比结果
    real_result = pd.DataFrame()  # 预测重构值
    time_ = []  # 时间
    mape, mae, mse, rmse = [], [], [], []
    for j in range(num_trial):
        result = []
        start_time = time.time()
        # 100(test_num)个测试样本
        for k in range(test_num):
            decomposer = EMD(data_tf[:-test_num + k])  # 最后一项不参与分解
            imfs = decomposer.decompose()  # 包括m个imf和一个res项
            pr = None
            for i in range(len(imfs)):
                d = seq_tf_matrix(np.hstack((imfs[i], [0])),
                                  n=lag + 1)  # 给imfs[i]加上一个值作为最后一项的真实值,只占个位子
                x = d[:, :-1]
                if ignore:
                    x = x[:, :-ignore]  # 忽略与预测值最近的ignore项
                y = d[:, -1]
                if pr is None:
                    pr = ann(x,
                             y,
                             test_num=1,
                             batch_size=batch_size,
                             hidden=hidden,
                             epochs=epochs)  # 预测的值,子序列预测结果
                else:
                    pr = pr + ann(x,
                                  y,
                                  test_num=1,
                                  batch_size=batch_size,
                                  hidden=hidden,
                                  epochs=epochs)  # 预测的值,子序列结果直接相加
            result.append(pr[0])
        end_time = time.time()
        pr = np.array(result)
        restore_value = restore_data(pr, data[-test_num - 1:-1])  # 还原预测值
        mape_, mae_, mse_, rmse_ = loss_function(restore_value,
                                                 data[-test_num:])
        # 保存第i次的结果
        pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr
        real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value
        # 保存第i次的评估指标
        time_.append((end_time - start_time) / 60)  # 分钟
        mape.append(mape_)
        mae.append(mae_)
        mse.append(mse_)
        rmse.append(rmse_)
        # 预测结果
    pre_data_tf_result['test_percentage'] = data_tf[
        -test_num:]  # 把真实的,需要预测的百分比值加入
    real_result['test_value'] = data[-test_num:]  # 把真实的需要预测的原值加入
    pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data +
                              '/data_tf_result/lag_' + str(lag) +
                              '_multi_emd_aann_data_tf_result.csv')
    real_result.to_csv('../' + ada_result + '/' + name_data +
                       '/real_result/lag_' + str(lag) +
                       '_multi_emd_aann_real_result.csv')
    # 预测结果评价指标
    result_evaluation = {
        'lag': lag,
        'time': time_,
        'mape': mape,
        'mae': mae,
        'mse': mse,
        'rmse': rmse
    }

    fw = open(
        '../' + ada_result + '/' + name_data +
        '/multi_emd_aann_result_evaluation.json', 'a')
    fw.write(json.dumps(result_evaluation) + '\n')
    fw.close()
Ejemplo n.º 20
0
fmax2 = 1.5 * 1.0 / 4
x2 = fmsin(N, fmin2, fmax2, p, N / 2, fmax2)[0]

f0 = 1.5 * 1.0 / 16

x3 = amgauss(N, N / 2, N / 8) * fmconst(N, f0)[0]

a1 = 1
a2 = 1
a3 = 1

x = np.real(a1 * x1 + a2 * x2 + a3 * x3)
x = x / np.max(np.abs(x))

decomposer = EMD(x)
imf = decomposer.decompose()

n_freq_bins = 256
short_window_length = 127
beta = 3 * np.pi
window = kaiser(short_window_length, beta=beta)

_, re_spec_sig, _ = spectrogram(x, t, n_freq_bins, window)
_, re_spec_imf1, _ = spectrogram(imf[0, :], t, n_freq_bins, window)
_, re_spec_imf2, _ = spectrogram(imf[1, :], t, n_freq_bins, window)
_, re_spec_imf3, _ = spectrogram(imf[2, :], t, n_freq_bins, window)

fig = plt.figure()
for i, rspec in enumerate(
    [re_spec_sig, re_spec_imf1, re_spec_imf2, re_spec_imf3]):
    rspec = np.abs(rspec)[:128, :]
Ejemplo n.º 21
0
import pandas as pd
import matplotlib.pyplot as plt

from pyhht.emd import EMD
from pyhht.utils import get_envelops
from pyhht.visualization import plot_imfs
# In[]
# load data
t = np.arange(0, 1, 0.01)
x = 2 * np.sin(2 * np.pi * 15 * t) + 4 * np.sin(2 * np.pi * 10 * t) * np.sin(
    2 * np.pi * t * 0.1) + np.sin(2 * np.pi * 5 * t)

upper, lower = get_envelops(x)
plt.plot(upper)
plt.plot(lower)
plt.show()

# In[] EMD decompose
decomposer = EMD(x)
imfs = decomposer.decompose()  # Decompose the input signal into IMFs.

plot_imfs(x, imfs, t)
print('%.3f' % decomposer.io())

plot_imfs.show()

plt.plot(imfs[1, :].T)
# In[] save IMFs
arr = np.vstack((imfs, x))
dataframe = pd.DataFrame(arr.T)
def Main():
    samplenumber = 5000
    File_Path = './Database/MIT-BIH'
    samp_rating = 360

    dir_files1=[]
    for (dirpath, dirnames, filenames) in os.walk(File_Path):
        dir_files1 += [os.path.join(File_Path, file[0:-4]) for file in filenames]

    dir_files = list(set(dir_files1))
    dir_files.sort()
    print dir_files
    Read_Files = []

    avg_min_RR_emd = []
    avg_max_RR_emd = []
    avg_avg_RR_emd = []
    avg_ratio_emd = []
    avg_coeff_emd = []

    avg_min_RR_orig = []
    avg_max_RR_orig = []
    avg_avg_RR_orig = []
    avg_ratio_orig = []
    avg_coeff_orig = []
    Diseases = []

    ##### Save the Data
    A = open('./Analysis/MIT-BIH/Analysis_avg_avg_RR.csv','w')
    B = open('./Analysis/MIT-BIH/Analysis_avg_ratio.csv','w')
    C = open('./Analysis/MIT-BIH/Analysis_avg_coeff.csv','w')
    A.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n')
    B.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n')
    C.write('Patient_ID'+','+'EMD'+','+'Original'+','+'disease'+'\n')

    for j in range(0,len(dir_files)):
        try:
            print dir_files[j],
            original_signal,ecgrecord = wfdb.srdsamp(dir_files[j])
            record = wfdb.rdsamp(dir_files[j])

            data_file = dir_files[j][-3:]
            sig_diseases = globals()['disease_'+str(data_file)]
            for gf in sig_diseases:
                time = globals()['beats_disease_'+str(data_file)][gf]
                time_split = time.split(':')
                minutes = time_split[0]
                seconds = time_split[1]
                total_seconds = int(minutes)*60 + int(seconds)
                total_samples = total_seconds * samp_rating
                disease = gf
                print gf,

                initial_start = 0 # per record starting index of each disease of that record
                ECG_signal = original_signal[initial_start:total_samples]
                sig_length = len(ECG_signal)
                print 'original sig length ', len(original_signal),
                print 'cut_signal_length ',sig_length,

                repetition = int(math.floor(sig_length/samplenumber))
                print 'repeat ', repetition,
                sig_start = 0
                count = 0
                for h in range(0,repetition):
                    signal = []
                    for i in range(sig_start,sig_start+samplenumber):
                        signal.append(ECG_signal[i][0]+ECG_signal[i][1])
                    try:
                        RR_orig,RR_time_orig,min_RR_orig,max_RR_orig,Average_RR_orig,Ratio_orig,Individual_coeff_orig,Avg_coeff_orig, Avg_template_orig, Individual_Beats_orig = ECG_analysis(signal[0:samplenumber],show=False,sampling_rate=samp_rating)
                        #Read_Files.append(dir_files[j])
                        #EMD Analysis
                        signal_for_EMD = np.asarray(signal[0:samplenumber])

                        decomposer = EMD(signal_for_EMD,n_imfs=3,maxiter=3000)
                        imfs = decomposer.decompose()

                        EMD_data = []
                        for i in range(0,samplenumber):
                            EMD_data.append(imfs[0][i]+imfs[1][i]+imfs[2][i])
                        RR_emd,RR_time_emd,min_RR_emd,max_RR_emd,Average_RR_emd,Ratio_emd,Individual_coeff_emd,Avg_coeff_emd,Avg_template_emd, Individual_Beats_emd = ECG_analysis(EMD_data[0:samplenumber],show=False,sampling_rate=samp_rating)

                        # Print
                        #print min_RR_emd, ',', min_RR_orig
                        #print max_RR_emd,',',max_RR_orig
                        #print 'AVG_RR_emd=',Average_RR_emd,' Avg_RR_orig=' ,Average_RR_orig,
                        #print Ratio_emd,',',Ratio_orig
                        print 'Emd_coeff=',Avg_coeff_emd,' Orig_coeff=',Avg_coeff_orig,
                        print 'start=',sig_start,' count=',count

                        '''
                        avg_min_RR_emd.append(min_RR_emd)
                        avg_max_RR_emd.append(max_RR_emd)
                        avg_avg_RR_emd.append(Average_RR_emd)
                        avg_ratio_emd.append(Ratio_emd)
                        avg_coeff_emd.append(Avg_coeff_emd)

                        avg_min_RR_orig.append(min_RR_orig)
                        avg_max_RR_orig.append(max_RR_orig)
                        avg_avg_RR_orig.append(Average_RR_orig)
                        avg_ratio_orig.append(Ratio_orig)
                        avg_coeff_orig.append(Avg_coeff_orig)
                        '''

                        #Diseases.append(disease)
                        sig_start = sig_start + samplenumber

                        A.write(dir_files[j]+','+str(Average_RR_emd)+','+str(Average_RR_orig)+','+disease+'\n')
                        B.write(dir_files[j]+','+str(Ratio_emd)+','+str(Ratio_orig)+','+disease+'\n')
                        C.write(dir_files[j]+','+str(Avg_coeff_emd)+','+str(Avg_coeff_orig)+','+disease+'\n')
                        count += 1
                    except:
                        sig_start = sig_start + samplenumber
                        print 'Problem in the cut sequencee'
                initial_start = total_samples
        except:
            print 'Problem: ',dir_files[j][-7:]


    '''
def EMD_data_preparation(filepath,patient_data,csv_folder,problem_data_file,samplenumber,number_of_IMFs,split_perc):

	files = glob.glob('./csv_folder/*')
	for f in files:
		os.remove(f)
	problem_data=open(problem_data_file,'w')

	#PTB Diagnostic ECG database Disease labels 
	miscle=['Stable angina','Palpitation', 'Unstable angina']
	cardiom=['Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)']
	ecg_lead = ['i','ii','iii','avr','avl','avf','v1','v2','v3','v4','v5','v6','vx','vy','vz']
	Sig_Records = {'Bundle branch block': 38092, 'Valvular heart disease': 37647, 'Myocarditis': 39672, 'Healthy control': 37500, 'Dysrhythmia': 39557, 'Myocardial infarction': 38951, 'Cardiomyopathy': 37659}

	unIMFs = open('./Problem_Data/unIMFs.csv','a')
	IMF1_train = open(csv_folder+'IMF1_train.csv', 'a')
	IMF2_train = open(csv_folder+'IMF2_train.csv', 'a')
	IMF1_test = open(csv_folder+'IMF1_test.csv', 'a')
	IMF2_test = open(csv_folder+'IMF2_test.csv', 'a')
	Train_time = open('Train_time.csv','a')
	Test_time = open('Test_time.csv','a')

	if number_of_IMFs >= 3:
		IMF3_train = open(csv_folder+'IMF3_train.csv', 'a')
		IMF3_test = open(csv_folder+'IMF3_test.csv', 'a')
	if number_of_IMFs >= 4:
		IMF4_train = open(csv_folder+'IMF4_train.csv', 'a')
		IMF4_test = open(csv_folder+'IMF4_test.csv', 'a')
	if number_of_IMFs >= 5:
		IMF5_train = open(csv_folder+'IMF5_train.csv', 'a')
		IMF5_test = open(csv_folder+'IMF5_test.csv', 'a')
	if number_of_IMFs == 6:
		IMF6_train = open(csv_folder+'IMF6_train.csv', 'a')
		IMF6_test = open(csv_folder+'IMF6_test.csv', 'a')


	f = open(patient_data)
	line = f.readline()
	disease_array=[]
	file_count = 0
	while line:
		file_count += 1
		if file_count < 1000:
			line = f.readline()
			file_count += 1
			print line, file_count
		else:
			file_count += 1
			splitted = line.split('/')
			file_name = str(splitted[1][0:8])
			patient_folder = str(splitted[0])

			total_path = filepath+patient_folder+'/'+file_name

			print patient_folder,'---',file_name,
			#print total_path

			try:
				signal,ecgrecord = wfdb.srdsamp(total_path)
				record = wfdb.rdsamp(total_path)
				print ecgrecord['comments'][4][22:],

				signal_length = len(signal)
				#repetition = int(math.floor(signal_length/samplenumber))

				if not ecgrecord['comments'][4][22:] == 'n/a':

					disease = ecgrecord['comments'][4][22:]
					if disease in miscle:
						disease = "Miscellaneous"
					elif disease in cardiom:
						disease = "Cardiomyopathy"

					if disease == 'Myocardial infarction':
						overlap = 1000
					elif disease == "Bundle branch block":
						overlap = 55
					elif disease == "Cardiomyopathy":
						overlap = 55
					elif disease == "Dysrhythmia":
						overlap = 35
					elif disease == "Healthy control":
						overlap = 255
					elif disease == "Myocarditis":
						overlap = 15
					elif disease == "Valvular heart disease":
						overlap = 15

					if disease not in disease_array:
						disease_array.append(disease)

					samplelength = 0
					undecomposed = 0
					sig_start_ov = 0
					repetition = 0
					while(signal_length-sig_start_ov >= samplenumber):
						repetition += 1
						sig_start_ov += overlap
					stop = int(math.ceil(repetition*split_perc))
					print 'repetition = ',repetition
					###########    Trining and Test Data Spliting   ######################
					#Training data prepare
					for j in range(0,stop):
						write_signal = []
						for sample in range(samplelength,samplelength+samplenumber):
							ecg_signal = 0
							for i1 in range(0,15):
								ecg_signal = ecg_signal+signal[sample][i1]
							write_signal.append(ecg_signal)

						EMD_signal = np.asarray(write_signal)

						try:
							start_time_train = time.time()
							decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000)
							imfs = decomposer.decompose()
							#Construct Modified EMD
							modified_EMD_train = []
							for q in range(0,samplenumber):
								modified_EMD_train.append(imfs[0][q]+imfs[1][q]+imfs[2][q])
							elapsed_time_train = time.time() - start_time_train
							Train_time.write(total_path+','+disease+','+str(elapsed_time_train)+'\n')
							#print len(imfs)

							str1 = str(imfs[0][0])
							str2 = str(imfs[1][0])

							if (len(imfs) == number_of_IMFs+1):
								for h in range(1,samplenumber):
									str1 = str1+','+str(imfs[0][h])
									str2 = str2+','+str(imfs[1][h])

								str1 = str1+','+disease+'\n'
								str2 = str2+','+disease+'\n'

								IMF1_train.write(str1)
								IMF2_train.write(str2)

								if number_of_IMFs >= 3:
									str3 = str(imfs[2][0])
									for h in range(1,samplenumber):
										str3 = str3+','+str(imfs[2][h])
									str3 = str3+','+disease+'\n'
									IMF3_train.write(str3)
								if number_of_IMFs >= 4:
									str4 = str(imfs[3][0])
									for h in range(1,samplenumber):
										str4 = str4+','+str(imfs[3][h])
									str4 = str4+','+disease+'\n'
									IMF4_train.write(str4)
								if number_of_IMFs >= 5:
									str5 = str(imfs[4][0])
									for h in range(1,samplenumber):
										str5 = str5+','+str(imfs[4][h])
									str5 = str5+','+disease+'\n'
									IMF5_train.write(str5)
								if number_of_IMFs==6:
									str6 = str(imfs[5][0])
									for h in range(1,samplenumber):
										str6 = str6+','+str(imfs[5][h])
									str6 = str6+','+disease+'\n'
									IMF6_train.write(str6)
							else:
								print ('IMF Number do not match')
								undecomposed = undecomposed + 1

							samplelength = samplelength+overlap

						except:
							print 'Could not be decomposed'
							samplelength = samplelength+overlap

					#Testing data preparation
					for j in range(stop,repetition):
						write_signal = []
						for sample in range(samplelength,samplelength+samplenumber):
							ecg_signal = 0
							for i1 in range(0,15):
								ecg_signal = ecg_signal+signal[sample][i1]
							write_signal.append(ecg_signal)

						EMD_signal = np.asarray(write_signal)

						try:
							start_time_test = time.time()
							decomposer = EMD(EMD_signal,n_imfs=number_of_IMFs,maxiter=3000)
							imfs = decomposer.decompose()
							#Construct Modified EMD
							modified_EMD_test = []
							for q in range(0,samplenumber):
								modified_EMD_test.append(imfs[0][q]+imfs[1][q]+imfs[2][q])
							elapsed_time_test = time.time() - start_time_test
							Test_time.write(total_path+','+disease+','+str(elapsed_time_test)+'\n')
							#print len(imfs)

							str1 = str(imfs[0][0])
							str2 = str(imfs[1][0])

							if (len(imfs) == number_of_IMFs+1):
								for h in range(1,samplenumber):
									str1 = str1+','+str(imfs[0][h])
									str2 = str2+','+str(imfs[1][h])

								str1 = str1+','+disease+'\n'
								str2 = str2+','+disease+'\n'

								IMF1_test.write(str1)
								IMF2_test.write(str2)

								if number_of_IMFs >= 3:
									str3 = str(imfs[2][0])
									for h in range(1,samplenumber):
										str3 = str3+','+str(imfs[2][h])
									str3 = str3+','+disease+'\n'
									IMF3_test.write(str3)
								if number_of_IMFs >= 4:
									str4 = str(imfs[3][0])
									for h in range(1,samplenumber):
										str4 = str4+','+str(imfs[3][h])
									str4 = str4+','+disease+'\n'
									IMF4_test.write(str4)
								if number_of_IMFs >= 5:
									str5 = str(imfs[4][0])
									for h in range(1,samplenumber):
										str5 = str5+','+str(imfs[4][h])
									str5 = str5+','+disease+'\n'
									IMF5_test.write(str5)
								if number_of_IMFs==6:
									str6 = str(imfs[5][0])
									for h in range(1,samplenumber):
										str6 = str6+','+str(imfs[5][h])
									str6 = str6+','+disease+'\n'
									IMF6_test.write(str6)
							else:
								print ('IMF Number do not match')
								undecomposed = undecomposed + 1

							samplelength = samplelength+overlap

						except:
							print 'Could not be decomposed'
							samplelength = samplelength+overlap

				string = patient_folder+'---'+file_name+'UNIMFed Records = '+str(undecomposed)+'\n'
				unIMFs.write(string)
				line = f.readline()

			except:
				problem=patient_folder+'/'+file_name+'\n'
				problem_data.write(problem)
				line = f.readline()
				print sys.exc_info(),'\n'

	f.close()
	problem_data.close()
	print disease_array

	IMF1_train.close()
	IMF2_train.close()
	IMF1_test.close()
	IMF2_test.close()

	if number_of_IMFs>=3:
		IMF3_train.close()
		IMF3_test.close()
	if number_of_IMFs>=4:
		IMF4_train.close()
		IMF4_test.close()
	if number_of_IMFs>=5:
		IMF5_train.close()
		IMF5_test.close()
	if number_of_IMFs==6:
		IMF6_train.close()
		IMF6_test.close()
	unIMFs.close()
Ejemplo n.º 24
0
def corrcoef_imfs(seq):
    decomposer = EMD(seq)
    imfs = decomposer.decompose()
    result = [corrcoef(imfs[i], seq) for i in range(len(imfs))]
    result.append(sum(map(abs, result)) / len(result))
    return result
Ejemplo n.º 25
0
    def hht_marginal_spectrum(self, dataset, params):

        # Setting data_path and checking if it's needed to compute this function for more bearings.
        processed_data_path = 'hht_marginal_spectrum/hht_marginal_spectrum'
        bearings_marginal_spectrum = dataset.load_processed_data(
            dataset, processed_data_path)
        bearings_not_processed = params['bearings']

        if bearings_marginal_spectrum[0]:
            bearings_marginal_spectrum = bearings_marginal_spectrum[1]

            bearings_processed = list(
                map(int, list(bearings_marginal_spectrum.keys())))
            bearings_not_processed = [
                x for x in params['bearings'] if x not in bearings_processed
            ]

            if bearings_not_processed == []:
                return bearings_marginal_spectrum

        # If can't find any saved file.
        else:
            bearings_marginal_spectrum = {}

        for current_bearing in bearings_not_processed:
            imfs_files = []
            bearing_marginal_spectrum = []
            bearing_files = dataset.bearings_files[str(current_bearing)]

            # Calculating IMFs for each data file.
            for bearing_file in bearing_files:
                data = bearing_file[params['vibration_signal']].values
                decomposer = EMD(data)
                imfs_files.append(decomposer.decompose())

            # Getting the frequency bins.
            N = len(data)
            fs = params['sampling_frequency']
            freq_bins_step = fs / N
            freq_bins = np.fft.fftfreq(N)[0:N // 2] * fs  # Timestep = 1.

            # Calculating Hilbert transform for each IMF.
            imfs_ht_files = []

            for imfs_file in imfs_files:
                imfs_ht_files.append(hilbert(imfs_file))

            # Calculating instantaneous frequency of each data.
            imfs_freqs_files = []
            for imfs_ht_file in imfs_ht_files:
                imfs_freqs_file = []
                for imf_ht_file in imfs_ht_file:
                    imfs_freqs_file.append(
                        pyhht.utils.inst_freq(imf_ht_file)[0] * fs
                    )  # [0] to select the frequencies. * fs because the inst_freq return normalized freqs.
                imfs_freqs_files.append(imfs_freqs_file)

            # Calculating absolute value and scaling by 1/N factor.
            N = len(imfs_ht_file[0])
            imfs_envelope_files = np.abs(imfs_ht_files) / N

            # Putting frequencies into the frequency bins and computing Hilbert Marginal Spectrum.
            imfs_envelope_files_bins = []
            for imfs_freqs_file, imfs_envelope_file in zip(
                    imfs_freqs_files, imfs_envelope_files):
                imfs_envelope_file_bins = []
                for imf_freqs_file, imf_envelope_file in zip(
                        imfs_freqs_file, imfs_envelope_file):
                    imfs_envelope_file_ = np.zeros(N // 2)

                    bin_index = [
                        int(freq // freq_bins_step) for freq in imf_freqs_file
                    ]

                    for index, abs_val in zip(bin_index, imf_envelope_file):
                        imfs_envelope_file_[index] += abs_val

                    imfs_envelope_file_bins.append(imfs_envelope_file_)

                imfs_envelope_files_bins.append(imfs_envelope_file_bins)

            # Summing Hilbert Marginal Spectrum of [0 : params['imfs_qty]] imfs.
            for imfs_envelope_file_bins in imfs_envelope_files_bins:
                bearing_marginal_spectrum.append([
                    sum(x) for x in zip(
                        *imfs_envelope_file_bins[0:params['imfs_qty']])
                ])

            # Saving frequencies, marginal spectrum and hilbert spectrum.
            bearings_marginal_spectrum[str(current_bearing)] = [
                freq_bins, bearing_marginal_spectrum, imfs_envelope_files_bins
            ]

        dataset.save_processed_data(bearings_marginal_spectrum,
                                    processed_data_path)

        return bearings_marginal_spectrum
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs):

    miscle = ['Stable angina', 'Palpitation', 'Unstable angina']
    cardiom = [
        'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)',
        'Heart failure (NYHA 2)'
    ]
    ecg_lead = [
        'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5',
        'v6', 'vx', 'vy', 'vz'
    ]

    f = open(patient_data)
    line = f.readline()
    disease_array = []

    while line:
        #splitted = line.split('/')
        #file_name = str(splitted[1][0:8])
        file_name = line[0:-1]
        #patient_folder = str(splitted[0])

        total_path = filepath + file_name

        print total_path

        #try:
        signal, ecgrecord = wfdb.rdsamp(total_path)
        record = wfdb.rdsamp(total_path)
        #print ecgrecord['comments'][4][22:]

        signal_length = len(signal)
        repetition = int(math.floor(signal_length / samplenumber))

        samplelength = 0
        undecomposed = 0

        stop = int(math.ceil(repetition * 0.7))

        ###########    Trining and Test Data Spliting   ######################
        #Training data prepare
        for j in range(0, stop):
            write_signal = []
            for sample in range(samplelength, samplelength + samplenumber):
                ecg_signal = 0
                for i1 in range(0, 12):
                    ecg_signal = ecg_signal + signal[sample][i1]
                write_signal.append(ecg_signal)

            EMD_signal = np.asarray(write_signal)

            #try:
            decomposer = EMD(EMD_signal, n_imfs=number_of_IMFs, maxiter=3000)
            imfs = decomposer.decompose()
            #print len(imfs)

            modified_EMD = []
            for h in range(0, samplenumber):
                modified_EMD.append(imfs[0][h] + imfs[1][h] + imfs[2][h])

            ### Plot data
            fig = plt.figure(figsize=(25, 15))
            plt.subplot(2, 1, 1)
            plt.plot(EMD_signal)
            plt.ylabel('Original Signal\n Amplitude', labelpad=15, fontsize=35)
            plt.xticks(fontsize=35)
            plt.yticks(fontsize=35)
            plt.subplot(2, 1, 2)
            plt.plot(modified_EMD)
            plt.ylabel('Modified Signal \n Amplitude',
                       labelpad=15,
                       fontsize=35)
            plt.xticks(fontsize=35)
            plt.yticks(fontsize=35)
            plt.xlabel('Sample Number', fontsize=35)
            fig.tight_layout()
            plt.savefig('modified_ECG_Petersburg.eps', format='eps', dpi=6000)
            plt.show()

            samplelength = samplelength + samplenumber

        line = f.readline()

    f.close()
    problem_data.close()
    print disease_array
Ejemplo n.º 27
0
    # print(ene[i])
# print(ener)
enerdb = 10 * np.log10(ener)
ener = ener / max(ener)
plt.plot(time[indxt], ener)
plt.show()
plt.plot(vtime, tmp)
plt.xlim(tmin, tmax)
plt.show()


# In[33]:

npt = len(trNfil[0].data)
emd = EMD(trNfil[0].data)
imfs = emd.decompose()
time = (np.linspace(1, npt, npt)) * dt
plt.rcParams["figure.figsize"] = (30.0, 50.0)
plot_imfs(trNfil[0].data, time, imfs)


# In[44]:

aa = trNfil[2].copy()
plotTrigger(aa, cft, 2.2, 0.5)
dm = len(cft)
item = [i for i in list(range(dm)) if cft[i] > 2.2]
print(min(item))


ene = [0] * dm
Ejemplo n.º 28
0
import matplotlib
matplotlib.use("TkAgg")
from pyhht.emd import EMD
from pyhht.visualization import plot_imfs
import matplotlib.pyplot as plt
from ELM import HiddenLayer

data = pd.read_csv('gold_data.csv', usecols=['settle'])
x = data['settle']  #输入原始数据
y = x  #输出原始数据
x = x.as_matrix(columns=None)  #输入矩阵
y = y.as_matrix(columns=None)  #转为输出矩阵
X = x  #ELM专用原始数据

decomposer = EMD(x)
imfs = decomposer.decompose()  #emd分解

p_days = 1  #预测p_days后的结果
p = 6  #用前p天的数据预测
C = 10**8  #正则化因子
t = 300  #时间分割点

for i in range(6):  #分解
    x_imfs = imfs[i]
    y_imfs = x  #分解后的y
    num_data = x.shape[0]
    y_in = y_imfs[p + p_days - 1:num_data]  #去除y的前p个数据
    x_in = np.zeros(shape=(1, p))  #生成一列p行的输入矩阵
    for j in range(num_data - p + 1 -
                   p_days):  #将原始x按照每p个一组转为矩阵  -p_days是因为最后一天要预测故前置
        x_temp = x_imfs[j:j + p]  #截取p个数为矩阵的一列
Ejemplo n.º 29
0
for i in range(partCount):
    startIndex = i * partLen
    endIndex = (i + 1) * partLen
    # temporarily adding neighbor parts for more accurate calculations
    # todo - hh : only half or quarter of neighbor parts can be enough?
    if i > 0:  # if not first part
        startIndex -= partLen
    if i < partCount - 2:  # until second from last part
        endIndex += partLen
    if i == partCount - 2:  # second from last part (last part's len may not be partLen)
        endIndex += len(sig) % partLen
    part = sig[startIndex:endIndex]

    # calculate imfs for the part
    decomposer = EMD(part)
    imfsPart = decomposer.decompose()[:-1]  # last element is residue

    # calculate instant frequency for each imf of the part
    instfPart = []
    magPart = []
    truncatedImfs = []
    for imf in imfsPart:
        hx = sp.hilbert(imf)
        mag = np.abs(hx)
        phx = np.unwrap(np.arctan2(hx.imag, hx.real))
        tempInstf = sampRate / (2 * np.pi) * np.diff(phx)

        # removing neighbor parts after calculations
        if i > 0:  # not first part
            tempInstf = tempInstf[partLen:]
            mag = mag[partLen:]
Ejemplo n.º 30
0
 def test_decomposition(self):
     """Test the decompose method of the emd class."""
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     decomposer = EMD(signal, t=self.ts)
     imfs = decomposer.decompose()
     self.assertItemsEqual(imfs.shape, (signal.shape[0], 3))
Ejemplo n.º 31
0
 def test_decomposition(self):
     """Test the decompose method of the emd class."""
     signal = np.sum([self.trend, self.mode1, self.mode2], axis=0)
     decomposer = EMD(signal, t=self.ts)
     imfs = decomposer.decompose()
     self.assertItemsEqual(imfs.shape, (signal.shape[0], 3))
def EMD_data_preparation(filepath, patient_data, samplenumber, number_of_IMFs):

    miscle = ['Stable angina', 'Palpitation', 'Unstable angina']
    cardiom = [
        'Heart failure (NYHA 4)', 'Heart failure (NYHA 3)',
        'Heart failure (NYHA 2)'
    ]
    ecg_lead = [
        'i', 'ii', 'iii', 'avr', 'avl', 'avf', 'v1', 'v2', 'v3', 'v4', 'v5',
        'v6', 'vx', 'vy', 'vz'
    ]

    f = open(patient_data)
    line = f.readline()
    disease_array = []

    while line:

        splitted = line.split('/')
        file_name = str(splitted[1][0:8])
        patient_folder = str(splitted[0])

        total_path = filepath + patient_folder + '/' + file_name

        print patient_folder, '---', file_name,
        #print total_path

        try:
            signal, ecgrecord = wfdb.rdsamp(total_path)
            record = wfdb.rdsamp(total_path)
            print ecgrecord['comments'][4][22:]

            signal_length = len(signal)
            repetition = int(math.floor(signal_length / samplenumber))

            if not ecgrecord['comments'][4][22:] == 'n/a':

                disease = ecgrecord['comments'][4][22:]
                if disease in miscle:
                    disease = "Miscellaneous"
                elif disease in cardiom:
                    disease = "Cardiomyopathy"

                if disease not in disease_array:
                    disease_array.append(disease)

                samplelength = 0
                undecomposed = 0

                stop = int(math.ceil(repetition * 0.7))

                ###########    Trining and Test Data Spliting   ######################
                #Training data prepare
                for j in range(0, stop):
                    write_signal = []
                    for sample in range(samplelength,
                                        samplelength + samplenumber):
                        ecg_signal = 0
                        for i1 in range(0, 15):
                            ecg_signal = ecg_signal + signal[sample][i1]
                        write_signal.append(ecg_signal)

                    EMD_signal = np.asarray(write_signal)

                    try:
                        decomposer = EMD(EMD_signal,
                                         n_imfs=number_of_IMFs,
                                         maxiter=3000)
                        imfs = decomposer.decompose()
                        #print len(imfs)

                        str1 = []
                        str2 = []
                        str3 = []
                        str4 = []
                        str5 = []
                        str6 = []
                        str1.append(imfs[0][0])
                        str2.append(imfs[1][0])

                        if (len(imfs) == number_of_IMFs + 1):
                            for h in range(1, samplenumber):
                                str1.append(imfs[0][h])
                                str2.append(imfs[1][h])

                            if number_of_IMFs >= 3:
                                str3.append(imfs[2][0])
                                for h in range(1, samplenumber):
                                    str3.append(imfs[2][h])

                            if number_of_IMFs >= 4:
                                str4.append(imfs[3][0])
                                for h in range(1, samplenumber):
                                    str4.append(imfs[3][h])

                            if number_of_IMFs >= 5:
                                str5.append(imfs[4][0])
                                for h in range(1, samplenumber):
                                    str5.append(imfs[4][h])

                            if number_of_IMFs == 6:
                                str6.append(imfs[5][0])
                                for h in range(1, samplenumber):
                                    str6.append(imfs[5][h])
                            res = []
                            res.append(imfs[6][0])
                            for h in range(1, samplenumber):
                                res.append(imfs[6][h])

                            ### Plot data
                            fig = plt.figure(figsize=(25, 15))
                            plt.subplot(8, 1, 1)
                            plt.plot(EMD_signal)
                            plt.ylabel('Signal',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 2)
                            plt.plot(str1)
                            plt.ylabel('IMF1',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 3)
                            plt.plot(str2)
                            plt.ylabel('IMF2',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 4)
                            plt.plot(str3)
                            plt.ylabel('IMF3',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 5)
                            plt.plot(str4)
                            plt.ylabel('IMF4',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 6)
                            plt.plot(str5)
                            plt.ylabel('IMF5',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 7)
                            plt.plot(str6)
                            plt.ylabel('IMF6',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            plt.subplot(8, 1, 8)
                            plt.plot(res)
                            plt.ylabel('Residual',
                                       rotation=0,
                                       horizontalalignment='right',
                                       fontsize=25)
                            plt.xlabel('Sample Number', fontsize=30)
                            plt.xticks(fontsize=25)
                            plt.yticks(fontsize=25)
                            fig.tight_layout()
                            plt.savefig('PTB_EMD.eps', format='eps', dpi=6000)
                            plt.show()
                        else:
                            print('IMF Number do not match')
                            undecomposed = undecomposed + 1

                        samplelength = samplelength + samplenumber

                    except:
                        print 'Could not be decomposed'
                        samplelength = samplelength + samplenumber

            line = f.readline()

        except:
            problem = patient_folder + '/' + file_name + '\n'
            line = f.readline()
            print sys.exc_info(), '\n'

    f.close()
    problem_data.close()
    print disease_array
Ejemplo n.º 33
0
            plt.show()
        else:
            print(colored("You have to do at least one EMD first.", 'red'))
        continue
    if name not in windset.keys():
        print(colored('This dataset is not exist', 'red'))
        continue
    cut = raw_input('Cut the zeros head and end?[y/n]')
    if cut == 'y':
        cutindex = [
            np.nonzero(windset[name])[0][0],
            np.nonzero(windset[name])[0][-1]
        ]
        realwindset = windset[name][cutindex[0]:cutindex[1] + 1]
    else:
        realwindset = windset[name]
    x = np.linspace(1, len(realwindset), len(realwindset))
    decomposer = EMD(realwindset)
    imfs = decomposer.decompose()
    size = imfs.shape
    plt.figure()
    plt.plot(x, realwindset)
    plt.title(name)
    plt.show()
    plt.figure(figsize=(20, 18))
    for loop in range(1, size[0] + 1):
        plt.subplot(size[0], 1, loop)
        plt.plot(x, imfs[loop - 1])
        plt.title(loop)
    plt.show()
Ejemplo n.º 34
0
def theta(seq):
    decomposer = EMD(seq)
    imfs = decomposer.decompose()
    rms_imfs = sum([rms(imfs[i])**2 for i in range(len(imfs))])
    rms_original = rms(seq)
    return abs(math.sqrt(rms_imfs) - rms_original) / rms_original