def test_ecg_clean(): sampling_rate = 1000 noise = 0.05 ecg = nk.ecg_simulate(sampling_rate=sampling_rate, noise=noise) ecg_cleaned_nk = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit") assert ecg.size == ecg_cleaned_nk.size # Assert that highpass filter with .5 Hz lowcut was applied. fft_raw = np.abs(np.fft.rfft(ecg)) fft_nk = np.abs(np.fft.rfft(ecg_cleaned_nk)) freqs = np.fft.rfftfreq(ecg.size, 1 / sampling_rate) assert np.sum(fft_raw[freqs < .5]) > np.sum(fft_nk[freqs < .5]) # Comparison to biosppy (https://github.com/PIA-Group/BioSPPy/blob/e65da30f6379852ecb98f8e2e0c9b4b5175416c3/biosppy/signals/ecg.py#L69) ecg_biosppy = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="biosppy") original, _, _ = biosppy.tools.filter_signal(signal=ecg, ftype='FIR', band='bandpass', order=int(0.3 * sampling_rate), frequency=[3, 45], sampling_rate=sampling_rate) assert np.allclose((ecg_biosppy - original).mean(), 0, atol=1e-6)
def extract_rpeak_features(row, signal): """ Extract the R peak features. :param row: a `BaseDataset` row to calculate the features from :param signal: the raw ECG signal :return: `row` with the added features """ ecg_cleaned = nk.ecg_clean(signal, sampling_rate=row.Fs) peaks, info = nk.ecg_peaks(ecg_cleaned, sampling_rate=row.Fs) r_peaks_sec = np.where(peaks['ECG_R_Peaks'].to_numpy() == 1)[0].astype( np.float32) r_peaks_sec /= row.Fs # get R-peak times in seconds num_peaks = len(r_peaks_sec) if num_peaks > 2: hrv = nk.hrv(peaks, sampling_rate=row.Fs, show=False).iloc[0] row = row.append(hrv) row['N_QRS'] = num_peaks rr = np.diff(r_peaks_sec) row = row.append(get_statistics(rr, 'RR')) row = row.append(get_statistics(signal, 'signal')) return row, info
def get_HRVs_values(data, header_data): filter_lowcut = 0.001 filter_highcut = 15.0 filter_order = 1 tmp_hea = header_data[0].split(' ') ptID = tmp_hea[0] num_leads = int(tmp_hea[1]) sample_Fs = int(tmp_hea[2]) gain_lead = np.zeros(num_leads) for ii in range(num_leads): tmp_hea = header_data[ii + 1].split(' ') gain_lead[ii] = int(tmp_hea[2].split('/')[0]) # for testing, we included the mean age of 57 if the age is a NaN # This value will change as more data is being released for iline in header_data: if iline.startswith('#Age'): tmp_age = iline.split(': ')[1].strip() age = int(tmp_age if tmp_age != 'NaN' else 57) elif iline.startswith('#Sex'): tmp_sex = iline.split(': ')[1] if tmp_sex.strip() == 'Female': sex = 1 else: sex = 0 elif iline.startswith('#Dx'): label = iline.split(': ')[1].split(',')[0] signal = data[1] gain = gain_lead[1] ecg_signal = nk.ecg_clean(signal * gain, sampling_rate=sample_Fs, method="biosppy") _, rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs) hrv_time = nk.hrv_time(rpeaks, sampling_rate=sample_Fs) # hrv_non = nk.hrv_nonlinear(rpeaks, sampling_rate=sample_Fs) try: signal_peak, waves_peak = nk.ecg_delineate(ecg_signal, rpeaks, sampling_rate=sample_Fs) p_peaks = waves_peak['ECG_P_Peaks'] except ValueError: print('Exception raised!') pass p_peaks = np.asarray(p_peaks, dtype=float) p_peaks = p_peaks[~np.isnan(p_peaks)] p_peaks = [int(a) for a in p_peaks] mean_P_Peaks = np.mean([signal[w] for w in p_peaks]) hrv_time['mean_P_Peaks'] = mean_P_Peaks hrv_time['age'] = age hrv_time['label'] = label # df = pd.concat([hrv_time, hrv_non], axis=1) return hrv_time
def test_ecg_rate(): sampling_rate = 1000 noise = 0.15 ecg = nk.ecg_simulate(duration=120, sampling_rate=sampling_rate, noise=noise, random_state=42) ecg_cleaned_nk = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit") signals, info = nk.ecg_peaks(ecg_cleaned_nk, method="neurokit") # Test without desired length. rate = nk.ecg_rate(rpeaks=info, sampling_rate=sampling_rate) assert rate.shape == (info["ECG_R_Peaks"].size, ) assert np.allclose(rate.mean(), 70, atol=2) # Test with desired length. test_length = 1200 rate = nk.ecg_rate(rpeaks=info, sampling_rate=sampling_rate, desired_length=test_length) assert rate.shape == (test_length, ) assert np.allclose(rate.mean(), 70, atol=2)
def test_ecg_peaks(): sampling_rate = 1000 noise = 0.15 ecg = nk.ecg_simulate(duration=120, sampling_rate=sampling_rate, noise=noise, random_state=42) ecg_cleaned_nk = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit") # Test without request to correct artifacts. signals, info = nk.ecg_peaks(ecg_cleaned_nk, correct_artifacts=False, method="neurokit") assert signals.shape == (120000, 1) assert np.allclose(signals["ECG_R_Peaks"].values.sum(dtype=np.int64), 139, atol=1) # Test with request to correct artifacts. signals, info = nk.ecg_peaks(ecg_cleaned_nk, correct_artifacts=True, method="neurokit") assert signals.shape == (120000, 1) assert np.allclose(signals["ECG_R_Peaks"].values.sum(dtype=np.int64), 139, atol=1)
def qrs_detection_pantompkins_vs_neurokit(self, dataset): row = dataset.data.iloc[50] signal = dataset.read_record(row.Record)[:row.Fs * 20 + 1] method_names = {'pantompkins': 'Pan–Tompkins', 'neurokit': 'Neurokit'} for method in ['pantompkins', 'neurokit']: ecg_cleaned = nk.ecg_clean(signal, sampling_rate=row.Fs, method=method) peaks, info = nk.ecg_peaks(ecg_cleaned, sampling_rate=row.Fs, method=method) r_peaks = np.where(peaks['ECG_R_Peaks'].to_numpy() == 1)[0] fig = go.Figure() fig.add_trace( go.Scatter(x=np.arange(len(signal)) / row.Fs, y=signal)) fig.add_trace( go.Scatter(mode='markers', x=r_peaks / row.Fs, y=signal[r_peaks])) fig.update_traces(marker=dict(size=8)) self.set_ecg_layout( fig, title=f'{row.Record} - R peaks ({method_names[method]} method)', showlegend=False, xaxis=dict(range=[0, 20]), yaxis=dict(range=[-5000, 5000])) self.save_image(fig, f'qrs_{method}.png', width=900, height=300)
def process_X_values(X, Y): dfs = [] Y["diagnostic_superclass"] = Y["diagnostic_superclass"].swifter.apply( lambda x: 0 if x == "NORM" else (1 if x == "MI" else (2 if x == "STTC" else (3 if x == "HYP" else 4)))) for v in tqdm(range(0, len(X))): temp = pd.DataFrame(X[v], columns=[ 'I', 'II', 'III', 'aVL', 'aVR', 'aVF', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6' ]) for value in temp.columns: ecg = np.array(temp[value]) try: signals = nk.ecg_clean(ecg, sampling_rate=250, method='pantompkins1985') except: signals = nk.ecg_clean(ecg, sampling_rate=100, method='pantompkins1985') temp[value] = signals temp['id'] = Y.iloc[v].patient_id s = temp.groupby('id').cumcount().add(1) temp = (temp.set_index(['id', s]).unstack().sort_index(axis=1, level=1)) temp['diagnostic_superclass'] = Y.iloc[v].diagnostic_superclass temp['strat_fold'] = Y.iloc[v].strat_fold temp['id'] = Y.iloc[v].patient_id dfs.append(temp) data = pd.concat(dfs) data = data[~np.isnan(data.id)] return data
def apply(sampling_rate): global x_global, y_global ecg = [] ecg.append(x_global) ecg.append(y_global) window_autofilter.destroy() ecg[1] = nk.ecg_clean(ecg[1], sampling_rate = sampling_rate) x_global = ecg[0] y_global = ecg[1] a.clear() a.plot(ecg[0], ecg[1]) plt.xlabel('time [s]') plt.ylabel('voltage [mV]') canvas.draw()
def test_ecg_findpeaks(): sampling_rate = 1000 ecg = nk.ecg_simulate(duration=60, sampling_rate=sampling_rate, noise=0, method="simple", random_state=42) ecg_cleaned = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit") # Test neurokit methodwith show=True info_nk = nk.ecg_findpeaks(ecg_cleaned, show=True) assert info_nk["ECG_R_Peaks"].size == 69 # This will identify the latest figure. fig = plt.gcf() assert len(fig.axes) == 2 # Test pantompkins1985 method info_pantom = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="pantompkins1985"), method="pantompkins1985") assert info_pantom["ECG_R_Peaks"].size == 70 # Test hamilton2002 method info_hamilton = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="hamilton2002"), method="hamilton2002") assert info_hamilton["ECG_R_Peaks"].size == 69 # Test christov2004 method info_christov = nk.ecg_findpeaks(ecg_cleaned, method="christov2004") assert info_christov["ECG_R_Peaks"].size == 273 # Test gamboa2008 method info_gamboa = nk.ecg_findpeaks(ecg_cleaned, method="gamboa2008") assert info_gamboa["ECG_R_Peaks"].size == 69 # Test elgendi2010 method info_elgendi = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="elgendi2010"), method="elgendi2010") assert info_elgendi["ECG_R_Peaks"].size == 70 # Test engzeemod2012 method info_engzeemod = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="engzeemod2012"), method="engzeemod2012") assert info_engzeemod["ECG_R_Peaks"].size == 70 # Test kalidas2017 method info_kalidas = nk.ecg_findpeaks(nk.ecg_clean(ecg, method="kalidas2017"), method="kalidas2017") assert np.allclose(info_kalidas["ECG_R_Peaks"].size, 68, atol=1) # Test martinez2003 method ecg = nk.ecg_simulate(duration=60, sampling_rate=sampling_rate, noise=0, random_state=42) ecg_cleaned = nk.ecg_clean(ecg, sampling_rate=sampling_rate, method="neurokit") info_martinez = nk.ecg_findpeaks(ecg_cleaned, method="martinez2003") assert np.allclose(info_martinez["ECG_R_Peaks"].size, 69, atol=1)
def generate_features(ecg, header): #input: 12-lead ecg and its header fs = 500 features = {} lead_names = [] for iline in header: if '.mat' in iline: name = iline.split(' 0 ')[2].strip() lead_names.append(name) for ecg_signal, lead in zip(ecg, lead_names): ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=fs) if np.all((ecg_cleaned == 0)): return None else: _, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=fs) if rpeaks['ECG_R_Peaks'].size == 0: return None else: try: signal_dwt, waves_dwt = nk.ecg_delineate( ecg_cleaned, rpeaks['ECG_R_Peaks'], sampling_rate=fs, method="dwt") biphase, areas, t_till_peaks, ampls, dur, idxs, pq_intervals = p_peak_features( ecg_cleaned, waves_dwt) features_for_single_lead = { 'PQ_int': calculate_features(pq_intervals), 'P_dur': calculate_features(dur), 'Area/Dur_P': calculate_features(idxs), 'Area_under_P': calculate_features(areas), 'P_amp': calculate_features(ampls), 'Time_till_P': calculate_features(t_till_peaks), 'Biphase_P': calculate_features(biphase) } except IndexError: return None features[lead] = features_for_single_lead return features
def my_processing(ecg_signal): # Try processing ecg_cleaned = nk.ecg_clean(ecg_signal, sampling_rate=300, method="biosppy") instant_peaks, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=300, method='hamilton2002') info = rpeaks try: # Additional info of the ecg signal delineate_signal, delineate_waves = nk.ecg_delineate( ecg_cleaned=ecg_cleaned, rpeaks=rpeaks, sampling_rate=300, method='cwt') except: delineate_signal = np.NaN delineate_waves = np.NaN return ecg_cleaned, delineate_signal, delineate_waves, info
def find_R_peaks(ecg_data, samplefreq): try: _, rpeaks = nk.ecg_peaks(ecg_data, sampling_rate=samplefreq) r_peaks = rpeaks['ECG_R_Peaks'] r_peaks = np.delete(r_peaks, np.where(np.isnan(r_peaks))[0]).astype(int) except: print("cleaning data") cleaned_ecg = nk.ecg_clean(ecg_data, sampling_rate=samplefreq, method="neurokit") try: _, rpeaks = nk.ecg_peaks(cleaned_ecg, sampling_rate=samplefreq) r_peaks = rpeaks['ECG_R_Peaks'] r_peaks = np.delete(r_peaks, np.where(np.isnan(r_peaks))[0]).astype(int) except: print("could not analyse cleaned ECG") #Midlertidig løsning: r_peaks = np.array([0, 1, 2, 3]) return r_peaks
def extract_features_tmaps( self, signal_tm: TensorMap, clean_method: str = "neurokit", r_method: str = "neurokit", wave_method: str = "dwt", min_peaks: int = 200, ): """ Function to extract the ecg features using the neurokit2 package. That is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and offsets. The result is saved internally. :param signal_tm: <TensorMap> :param clean_method: <str> The processing pipeline to apply. Can be one of ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’, ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’. :param r_method: <str> The algorithm to be used for R-peak detection. Can be one of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’, ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’ or ‘kalidas2017’. :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete wavelet transform or ‘cwt’ for continuous wavelet transform. :param min_peaks: <int> Minimum R peaks to be detected to proceed with further calculations. """ for i, _ in enumerate(self.sampling_rate): sampling_rate = self.sampling_rate[i][0] init = self.sampling_rate[i][1] if i == len(self.sampling_rate) - 1: end = -1 else: end = self.sampling_rate[i + 1][1] ecg_signal = signal_tm.tensor_from_file(signal_tm, self)[0][init:end] ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate, clean_method) try: _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate, r_method) except IndexError: continue if len(r_peaks["ECG_R_Peaks"]) < min_peaks: continue _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks, sampling_rate) _, waves_peaks_2 = nk.ecg_delineate( ecg_signal, r_peaks, sampling_rate, wave_method, ) waves_peaks.update(waves_peaks_2) for peak_type in r_peaks: if peak_type not in self.r_peaks: self.r_peaks[peak_type] = r_peaks[peak_type] else: self.r_peaks[peak_type] = np.append( self.r_peaks[peak_type], r_peaks[peak_type], ) for peak_type in waves_peaks: if peak_type not in self.waves_peaks: self.waves_peaks[peak_type] = waves_peaks[peak_type] else: self.waves_peaks[peak_type] = np.append( self.waves_peaks[peak_type], waves_peaks[peak_type], ) for peak_type in self.r_peaks: self.r_peaks[peak_type] = list(self.r_peaks[peak_type]) for peak_type in self.waves_peaks: self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
def extract_features( self, clean_method: str = "neurokit", r_method: str = "neurokit", wave_method: str = "dwt", min_peaks: int = 200, size: int = 200000, ): """ Function to extract the ecg features using the neurokit2 package. That is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and offsets. The result is saved internally. :param clean_method: <str> The processing pipeline to apply. Can be one of ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’, ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’. :param r_method: <str> The algorithm to be used for R-peak detection. Can be one of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’, ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’ or ‘kalidas2017’. :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete wavelet transform or ‘cwt’ for continuous wavelet transform. :param min_peaks: <int> Minimum R peaks to be detected to proceed with further calculations. :param size: <int> ECG sample size to analyze per loop. """ if not self.lead: return for i, _ in enumerate(self.sampling_rate): sampling_rate = self.sampling_rate[i][0] init = self.sampling_rate[i][1] if i == len(self.sampling_rate) - 1: ecg_signal_size = ( ECG_TMAPS[f"{self.lead}_value"].tensor_from_file( ECG_TMAPS[f"{self.lead}_value"], self, visit=self.visit, )[0][init:].shape[0]) else: ecg_signal_size = self.sampling_rate[i + 1][1] - init if size < ecg_signal_size: end = init + size else: end = init + ecg_signal_size while init < ecg_signal_size + self.sampling_rate[i][1]: ecg_signal = ECG_TMAPS[f"{self.lead}_value"].tensor_from_file( ECG_TMAPS[f"{self.lead}_value"], self, visit=self.visit, )[0][init:end] ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate, clean_method) try: _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate, r_method) except IndexError: init = end end = init + size if end > ecg_signal_size + self.sampling_rate[i][1]: end = ecg_signal_size + self.sampling_rate[i][1] continue if len(r_peaks["ECG_R_Peaks"]) < min_peaks: init = end end = init + size if end > ecg_signal_size + self.sampling_rate[i][1]: end = ecg_signal_size + self.sampling_rate[i][1] continue _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks, sampling_rate) _, waves_peaks_2 = nk.ecg_delineate( ecg_signal, r_peaks, sampling_rate, wave_method, ) waves_peaks.update(waves_peaks_2) for peak_type in r_peaks: if peak_type not in self.r_peaks: self.r_peaks[peak_type] = r_peaks[peak_type] else: self.r_peaks[peak_type] = np.append( self.r_peaks[peak_type], r_peaks[peak_type], ) for peak_type in waves_peaks: if peak_type not in self.waves_peaks: self.waves_peaks[peak_type] = waves_peaks[peak_type] else: self.waves_peaks[peak_type] = np.append( self.waves_peaks[peak_type], waves_peaks[peak_type], ) init = end end = init + size if end > ecg_signal_size + self.sampling_rate[i][1]: end = ecg_signal_size + self.sampling_rate[i][1] for peak_type in self.r_peaks: self.r_peaks[peak_type] = list(self.r_peaks[peak_type]) for peak_type in self.waves_peaks: self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
def create_df(dataframe: pd.DataFrame) -> pd.DataFrame: # get lengths of signals for each sample lengths = [] width = dataframe.shape[1] for row in dataframe.index.tolist(): temp_width = width for item in dataframe.loc[row][::-1]: if not pd.isna(item) and isinstance(item, float): temp_width -= 1 break temp_width -= 1 lengths.append(temp_width) """ README For the following features we measured: [mean, median, 5 % percentile, 95 % percentile, standard deviation] R-peak location were retrieved by nk.ecg_peaks Q-peak and S-location were retrieved by nk.ecg_delineate ?_ampl_* ?-Peak amplitude ?_nr_peaks number of ?-Peaks ?_diff_* Interval between ?-Peaks QRS_diff_* QRS duration len_* length of signal Qual_* quality of signal measured with nk.ecg_quality sign_* signal Also the output from nk.hrv_time which contains different measurements for the heart rate variation (HRV*) was added Additionally one 'typical' heartbeat was greated (all length 180): MN_* mean signal MD_* median signal P5_* 5 % percentile signal P95_* 95 % percentile signal SD_* standard deviation of signal """ names = ['R_ampl_mean', 'R_ampl_median', 'R_ampl_perc5', 'R_ampl_perc95', 'R_ampl_sd', 'R_nr_peaks', 'len_mean', 'len_median', 'len_perc5', 'len_perc95', 'len_sd', 'sign_mean', 'sign_median', 'sign_perc5', 'sign_perc95', 'sign_sd', 'Qual_mean', 'Qual_median', 'Qual_perc5', 'Qual_perc95', 'Qual_sd', 'Q_ampl_mean', 'Q_ampl_median', 'Q_ampl_perc5', 'Q_ampl_perc95', 'Q_ampl_sd', 'Q_nr_peaks', 'Q_diff_mean', 'Q_diff_median', 'Q_diff_perc5', 'Q_diff_perc95', 'Q_diff_sd', 'S_ampl_mean', 'S_ampl_median', 'S_ampl_perc5', 'S_ampl_perc95', 'S_ampl_sd', 'S_nr_peaks', 'S_diff_mean', 'S_diff_median', 'S_diff_perc5', 'S_diff_perc95', 'S_diff_sd', 'P_ampl_mean', 'P_ampl_median', 'P_ampl_perc5', 'P_ampl_perc95', 'P_ampl_sd', 'P_nr_peaks', 'T_ampl_mean', 'T_ampl_median', 'T_ampl_perc5', 'T_ampl_perc95', 'T_ampl_sd', 'T_nr_peaks', 'QRS_diff_mean', 'QRS_diff_median', 'QRS_diff_perc5', 'QRS_diff_perc95', 'QRS_diff_sd', 'PR_diff_mean', 'PR_diff_median', 'PR_diff_perc5', 'PR_diff_perc95', 'PR_diff_sd', 'RT_diff_mean', 'RT_diff_median', 'RT_diff_perc5', 'RT_diff_perc95', 'RT_diff_sd', 'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN', 'HRV_CVSD', 'HRV_MedianNN', 'HRV_MadNN', 'HRV_MCVNN', 'HRV_IQRNN', 'HRV_pNN50', 'HRV_pNN20', 'HRV_TINN', 'HRV_HTI', 'HRV_ULF','HRV_VLF','HRV_LF','HRV_HF','HRV_VHF','HRV_LFHF','HRV_LFn','HRV_HFn', 'HRV_LnHF', 'HRV_SD1','HRV_SD2', 'HRV_SD1SD2','HRV_S','HRV_CSI','HRV_CVI','HRV_CSI_Modified', 'HRV_PIP', 'HRV_IALS','HRV_PSS','HRV_PAS','HRV_GI','HRV_SI','HRV_AI','HRV_PI','HRV_C1d','HRV_C1a','HRV_SD1d', 'HRV_SD1a','HRV_C2d','HRV_C2a','HRV_SD2d','HRV_SD2a','HRV_Cd','HRV_Ca','HRV_SDNNd','HRV_SDNNa','HRV_ApEn', 'HRV_SampEn','J_LF','J_HF','J_L/H'] template_len = 180 mean_names = ['MN_' + str(index) for index in range(template_len)] median_names = ['MD_' + str(index) for index in range(template_len)] perc5_names = ['P5_' + str(index) for index in range(template_len)] perc95_names = ['P95_' + str(index) for index in range(template_len)] sd_names = ['SD_' + str(index) for index in range(template_len)] wavelet = 'db3' wl_len = int(np.floor((template_len + pywt.Wavelet(wavelet).dec_len - 1) / 2)) wl_mean_names = ['WLMN_' + str(index) for index in range(2*wl_len)] wl_median_names = ['WLMD_' + str(index) for index in range(2*wl_len)] wl_perc5_names = ['WLP5_' + str(index) for index in range(2*wl_len)] wl_perc95_names = ['WLP95_' + str(index) for index in range(2*wl_len)] wl_sd_names = ['WLSD_' + str(index) for index in range(2*wl_len)] typical_signal_names = mean_names + median_names + perc5_names + perc95_names + sd_names + wl_mean_names + \ wl_median_names + wl_perc5_names + wl_perc95_names + wl_sd_names names += typical_signal_names data = np.empty([dataframe.shape[0], len(names)]) iteration = 0 for row_index, row in dataframe.iterrows(): print(row_index) # Retrieve ECG data ecg_signal = row[:lengths[iteration] + 1] ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate=SAMPLING_RATE) # Find R-peaks peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=SAMPLING_RATE) # R amplitude R_amplitudes = ecg_signal[info['ECG_R_Peaks']] # Check if the signal is flipped # Check if we have enough peaks to retrieve more information if len(R_amplitudes) > 4: _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False) # Q amplitude # remove nan values Q_amplitudes = [ecg_signal[peak_index] if str(peak_index) != 'nan' else - np.infty for peak_index in waves_peak['ECG_Q_Peaks']] if np.sum([1 if np.abs(rpeak) > np.abs(Q_amplitudes[index]) else -1 for index, rpeak in enumerate(R_amplitudes)]) < 0: print("flip", row_index) ecg_signal = -ecg_signal peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=300) # R amplitude R_amplitudes = ecg_signal[info['ECG_R_Peaks']] if len(R_amplitudes) > 4: _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False) data_temp = [] if len(R_amplitudes) > 0: data_temp = [np.mean(R_amplitudes), np.median(R_amplitudes), np.percentile(R_amplitudes, q=5), np.percentile(R_amplitudes, q=95), np.std(R_amplitudes), len(R_amplitudes)] else: empty = np.empty([6]) empty[:] = np.NaN data_temp += empty.tolist() # length of signal data_new = [np.mean(lengths[iteration] / SAMPLING_RATE), np.median(lengths[iteration] / SAMPLING_RATE), np.percentile(lengths[iteration] / SAMPLING_RATE, q=5), np.percentile(lengths[iteration] / SAMPLING_RATE, q=95), np.std(lengths[iteration] / SAMPLING_RATE)] data_temp += data_new # signal data_new = [np.mean(ecg_signal), np.median(ecg_signal), np.percentile(ecg_signal, q=5), np.percentile(ecg_signal, q=95), np.std(ecg_signal)] data_temp += data_new # Check if we have enough peaks to retrieve more information if len(R_amplitudes) > 4: quality = nk.ecg_quality(ecg_signal, sampling_rate=SAMPLING_RATE) data_new = [np.mean(quality), np.median(quality), np.percentile(quality, q=5), np.percentile(quality, q=95), np.std(quality)] data_temp += data_new # Delineate the ECG signal # “ECG_P_Peaks”, “ECG_Q_Peaks”, “ECG_S_Peaks”, “ECG_T_Peaks”, “ECG_P_Onsets”, “ECG_T_Offsets” # _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=SAMPLING_RATE, show=False) # Q amplitude # remove nan values Q_peaks = [peak for peak in waves_peak['ECG_Q_Peaks'] if str(peak) != 'nan'] if len(Q_peaks) > 0: Q_amplitudes = ecg_signal[Q_peaks] data_new = [np.mean(Q_amplitudes), np.median(Q_amplitudes), np.percentile(Q_amplitudes, q=5), np.percentile(Q_amplitudes, q=95), np.std(Q_amplitudes), len(Q_amplitudes)] data_temp += data_new else: empty = np.empty([6]) empty[:] = np.NaN empty[5] = 0 data_temp += empty.tolist() # more than 1 Q-Peak => can build interval[s] if len(Q_peaks) > 1: Q_peaks_diff = [(Q_peaks[index + 1] - Q_peaks[index]) / SAMPLING_RATE for index, item in enumerate(Q_peaks[:len(Q_peaks) - 1])] # QQ interval data_new = [np.mean(Q_peaks_diff), np.median(Q_peaks_diff), np.percentile(Q_peaks_diff, q=5), np.percentile(Q_peaks_diff, q=95), np.std(Q_peaks_diff)] data_temp += data_new # 0 or 1 Q-peak = no interval => return nan else: empty = np.empty([5]) empty[:] = np.NaN data_temp += empty.tolist() # S amplitude # remove nan values S_peaks = [peak for peak in waves_peak['ECG_S_Peaks'] if str(peak) != 'nan'] if len(S_peaks) > 0: S_amplitudes = ecg_signal[S_peaks] data_new = [np.mean(S_amplitudes), np.median(S_amplitudes), np.percentile(S_amplitudes, q=5), np.percentile(S_amplitudes, q=95), np.std(S_amplitudes), len(S_amplitudes)] data_temp += data_new else: empty = np.empty([6]) empty[:] = np.NaN empty[5] = 0 data_temp += empty.tolist() # more than one S-peak if len(S_peaks) > 1: S_peaks_diff = [(S_peaks[index + 1] - S_peaks[index]) / SAMPLING_RATE for index, item in enumerate(S_peaks[:len(S_peaks) - 1])] # SS interval data_new = [np.mean(S_peaks_diff), np.median(S_peaks_diff), np.percentile(S_peaks_diff, q=5), np.percentile(S_peaks_diff, q=95), np.std(S_peaks_diff)] data_temp += data_new # 0 or 1 S-peak = no interval => return nan else: empty = np.empty([5]) empty[:] = np.NaN data_temp += empty.tolist() P_peaks = [peak for peak in waves_peak['ECG_P_Peaks'] if str(peak) != 'nan'] if len(P_peaks) > 0: P_amplitudes = ecg_signal[P_peaks] data_new = [np.mean(P_amplitudes), np.median(P_amplitudes), np.percentile(P_amplitudes, q=5), np.percentile(P_amplitudes, q=95), np.std(P_amplitudes), len(P_amplitudes)] data_temp += data_new else: empty = np.empty([6]) empty[:] = np.NaN empty[5] = 0 data_temp += empty.tolist() T_peaks = [peak for peak in waves_peak['ECG_T_Peaks'] if str(peak) != 'nan'] if len(T_peaks) > 0: T_peaks = ecg_signal[T_peaks] data_new = [np.mean(T_peaks), np.median(T_peaks), np.percentile(T_peaks, q=5), np.percentile(T_peaks, q=95), np.std(T_peaks), len(T_peaks)] data_temp += data_new else: empty = np.empty([6]) empty[:] = np.NaN empty[5] = 0 data_temp += empty.tolist() # QRS interval QRS_peaks_diff = [] # compute difference between Q and S peak for index in range(len(waves_peak['ECG_Q_Peaks'])): if not (np.isnan(waves_peak['ECG_Q_Peaks'][index]) or np.isnan(waves_peak['ECG_S_Peaks'][index])): QRS_peaks_diff.append( (waves_peak['ECG_S_Peaks'][index] - waves_peak['ECG_Q_Peaks'][index]) / SAMPLING_RATE) if len(QRS_peaks_diff) > 0: data_new = [np.mean(QRS_peaks_diff), np.median(QRS_peaks_diff), np.percentile(QRS_peaks_diff, q=5), np.percentile(QRS_peaks_diff, q=95), np.std(QRS_peaks_diff)] data_temp += data_new else: empty = np.empty([5]) empty[:] = np.NaN data_temp += empty.tolist() # PR interval PR_peaks_diff = [] # compute difference between P and R peak for index in range(len(waves_peak['ECG_P_Peaks'])): if not np.isnan(waves_peak['ECG_P_Peaks'][index]): PR_peaks_diff.append( (info['ECG_R_Peaks'][index] - waves_peak['ECG_P_Peaks'][index]) / SAMPLING_RATE) if len(PR_peaks_diff) > 0: data_new = [np.mean(PR_peaks_diff), np.median(PR_peaks_diff), np.percentile(PR_peaks_diff, q=5), np.percentile(PR_peaks_diff, q=95), np.std(PR_peaks_diff)] data_temp += data_new else: empty = np.empty([5]) empty[:] = np.NaN data_temp += empty.tolist() # RT interval RT_peaks_diff = [] # compute difference between P and R peak for index in range(len(waves_peak['ECG_T_Peaks'])): if not np.isnan(waves_peak['ECG_T_Peaks'][index]): RT_peaks_diff.append( (waves_peak['ECG_T_Peaks'][index] - info['ECG_R_Peaks'][index]) / SAMPLING_RATE) if len(RT_peaks_diff) > 0: data_new = [np.mean(RT_peaks_diff), np.median(PR_peaks_diff), np.percentile(RT_peaks_diff, q=5), np.percentile(RT_peaks_diff, q=95), np.std(RT_peaks_diff)] data_temp += data_new else: empty = np.empty([5]) empty[:] = np.NaN data_temp += empty.tolist() # Extract clean EDA and SCR features # explanation of features: # https://neurokit2.readthedocs.io/en/latest/functions.html?highlight=hrv%20time#neurokit2.hrv.hrv_time hrv_time = nk.hrv(peaks, sampling_rate=SAMPLING_RATE, show=False) data_new = hrv_time.values.tolist()[0] data_temp += data_new # Jannik # http://www.paulvangent.com/2016/03/21/analyzing-a-discrete-heart-rate-signal-using-python-part-2/ rpeaks = info['ECG_R_Peaks'] r_interval = [rpeaks[index+1]-rpeaks[index] for index in range(len(rpeaks)-1)] RR_x_new = np.linspace(rpeaks[0],rpeaks[-2],rpeaks[-2]) f = interp1d(rpeaks[:-1], r_interval, kind='cubic') n = lengths[iteration] + 1 # Length of the signal frq = np.fft.fftfreq(n, d=(1 / SAMPLING_RATE)) # divide the bins into frequency categories frq = frq[range(int(n/2))] # Get single side of the frequency range Y = np.fft.fft(f(RR_x_new))/n # Calculate FFT try: Y = Y[range(int(n / 2))] lf = np.trapz(abs(Y[(frq >= 0.04) & (frq <= 0.15)])) hf = np.trapz(abs(Y[(frq >= 0.16) & (frq <= 0.5)])) # Do the same for 0.16-0.5Hz (HF) data_new = [lf, hf, lf / hf] data_temp += data_new except IndexError as err: print(err) data_temp += [None, None, None] # if we don't have enough R peaks return vector of nan's else: empty = np.empty([len(names) - 16 - len(typical_signal_names)]) empty[:] = np.NaN data_temp += empty.tolist() # Create a 'typical' heartbeat # Scaler = StandardScaler() # ecg_signal = Scaler.fit_transform(X=ecg_signal.reshape(-1, 1)).reshape(1, -1)[0].tolist() out = ecg.ecg(signal=ecg_signal, sampling_rate=SAMPLING_RATE, show=False) mean = np.mean(out['templates'], axis=0) median = np.median(out['templates'], axis=0) perc5 = np.percentile(out['templates'].astype(np.float64), axis=0, q=5) perc95 = np.percentile(out['templates'].astype(np.float64), axis=0, q=95) std = np.std(out['templates'].astype(np.float64), axis=0) data_new = np.concatenate((mean, median, perc5, perc95, std)).tolist() data_temp += data_new (wl_mean_cA, wl_mean_cD) = pywt.dwt(np.mean(out['templates'], axis=0), 'db3', 'periodic') (wl_median_cA, wl_median_cD) = pywt.dwt(np.median(out['templates'], axis=0), 'db3', 'periodic') (wl_perc5_cA, wl_perc5_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=5), 'db3', 'periodic') (wl_perc95_cA, wl_perc95_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=95), 'db3', 'periodic') (wl_sd_cA, wl_sd_cD) = pywt.dwt(np.std(out['templates'].astype(np.float64), axis=0), 'db3', 'periodic') data_new = np.concatenate((wl_mean_cA, wl_mean_cD, wl_median_cA, wl_median_cD, wl_perc5_cA, wl_perc5_cD, wl_perc95_cA, wl_perc95_cD, wl_sd_cA, wl_sd_cD)).tolist() data_temp += data_new data[iteration] = data_temp iteration += 1 features = pd.DataFrame(data, columns=names) return features
def compute_features(data, condition, sampling_rate=700, window_size=60, window_shift=0.25): index = 0 init = time.time() # data cleaning ## ECG ecg_cleaned = nk.ecg_clean(data["ECG"][condition].flatten(), sampling_rate=sampling_rate) ## == OLD # ecg_rpeaks, _ = nk.ecg_peaks(ecg_cleaned, sampling_rate=sampling_rate) # ecg_hr = nk.signal_rate(ecg_rpeaks, sampling_rate=sampling_rate) ## == ## EDA ## 5Hz lowpass filter eda_highcut = 5 eda_filtered = nk.signal_filter(data['EDA'][condition].flatten(), sampling_rate=sampling_rate, highcut=eda_highcut) eda_cleaned = nk.standardize(eda_filtered) # TODO: not sure about the approach. cvxeda takes longer periods # phasic_tonic = nk.eda_phasic(cleaned, sampling_rate=700, method='cvxeda') eda_phasic_tonic = nk.eda_phasic(eda_cleaned, sampling_rate=sampling_rate) eda_phasic_tonic['t'] = [(1 / sampling_rate) * i for i in range(eda_phasic_tonic.shape[0])] eda_scr_peaks, scr_info = nk.eda_peaks(eda_phasic_tonic['EDA_Phasic'], sampling_rate=sampling_rate) ## EMG ## For 5 sec window signal ## More on DC Bias https://www.c-motion.com/v3dwiki/index.php/EMG:_Removing_DC_Bias emg_lowcut = 50 emg_filtered_dc = nk.signal_filter(data['EMG'][condition].flatten(), sampling_rate=sampling_rate, lowcut=emg_lowcut) # OR 100 Hz highpass Butterworth filter followed by a constant detrending # filtered_dc = nk.emg_clean(chest_data_dict['EMG'][baseline].flatten(), sampling_rate=700) ## For 60 sec window signal # 50Hz lowpass filter emg_highcut = 50 emg_filtered = nk.signal_filter(data['EMG'][condition].flatten(), sampling_rate=sampling_rate, highcut=emg_highcut) ## Resp ## Method biosppy important to appply bandpass filter 0.1 - 0.35 Hz resp_processed, _ = nk.rsp_process(data['Resp'][condition].flatten(), sampling_rate=sampling_rate, method='biosppy') print('Elapsed Preprocess', str(timedelta(seconds=time.time() - init))) init = time.time() chest_df_5 = pd.DataFrame() # For 5 sec window size chest_df = pd.DataFrame() window = int(sampling_rate * window_size) for i in range(0, data['ACC'][condition].shape[0] - window, int(sampling_rate * window_shift)): # ACC w_acc_data = data['ACC'][condition][i: window + i] acc_x_mean, acc_y_mean, acc_z_mean = np.mean(w_acc_data, axis=0) # Feature acc_x_std, acc_y_std, acc_z_std = np.std(w_acc_data, axis=0) # Feature acc_x_peak, acc_y_peak, acc_z_peak = np.amax(w_acc_data, axis=0) # Feature acc_x_absint, acc_y_absint, acc_z_absint = np.abs(np.trapz(w_acc_data, axis=0)) # Feature xyz = np.sum(w_acc_data, axis=0) xyz_mean = np.mean(xyz) # Feature xyz_std = np.std(xyz) # Feature xyz_absint = np.abs(np.trapz(xyz)) # Feature # == OLD # ## ECG # w_ecg_rpeaks = ecg_rpeaks[i: window + i] # # HR # w_ecg_hr = ecg_hr[i: window + i] # hr_mean = np.mean(w_ecg_hr) # Feature # hr_std = np.std(w_ecg_hr) # Feature # # HRV Time-domain Indices # # HRV_MeanNN # # HRV_SDNN # # HRV_pNN50 # # HRV_RMSSD -> Root mean square of the HRV # # HRV_HTI -> Triangular interpolation index # hrv_time = nk.hrv_time(w_ecg_rpeaks, sampling_rate=sampling_rate, show=False) # hrv_mean = hrv_time.loc[0, 'HRV_MeanNN'] # Feature # hrv_std = hrv_time.loc[0, 'HRV_SDNN'] # Feature # # TODO: NN50 # # hrv_NN50 = # hrv_pNN50 = hrv_time.loc[0, 'HRV_pNN50'] # Feature # hrv_TINN = hrv_time.loc[0, 'HRV_HTI'] # Feature # hrv_rms = hrv_time.loc[0, 'HRV_RMSSD'] # Feature # # HRV Frequency-domain Indices # # TODO: get NaN values within windows (*) # # HRV_ULF * # # HRV_LF * # # HRV_HF # # HRV_VHF # # HRV_LFHF - Ratio LF/HF * # # HRV_LFn * # # HRV_HFn # hrv_freq = nk.hrv_frequency(w_ecg_rpeaks, sampling_rate=sampling_rate, ulf=(0.01, 0.04), lf=(0.04, 0.15), hf=(0.15, 0.4), vhf=(0.4, 1.)) # hrv_ULF = hrv_freq.loc[0, 'HRV_ULF'] # Feature # hrv_LF = hrv_freq.loc[0, 'HRV_LF'] # Feature # hrv_HF = hrv_freq.loc[0, 'HRV_HF'] # Feature # hrv_VHF = hrv_freq.loc[0, 'HRV_VHF'] # Feature # hrv_lf_hf_ratio = hrv_freq.loc[0, 'HRV_LFHF'] # Feature # hrv_f_sum = np.nansum(np.hstack((hrv_ULF, hrv_LF, hrv_HF, hrv_VHF))) # # TODO: rel_f # # hrv_rel_f = # hrv_LFn = hrv_freq.loc[0, 'HRV_LFn'] # Feature # hrv_HFn = hrv_freq.loc[0, 'HRV_HFn'] # Feature # == ## ECG w_ecg_cleaned = ecg_cleaned[i: window + i] _, ecg_info = nk.ecg_peaks(w_ecg_cleaned, sampling_rate=sampling_rate) w_ecg_rpeaks = ecg_info['ECG_R_Peaks'] ecg_nni = pyhrv.tools.nn_intervals(w_ecg_rpeaks) # HR rs_hr = pyhrv.time_domain.hr_parameters(ecg_nni) hr_mean = rs_hr['hr_mean'] # Feature hr_std = rs_hr['hr_std'] # Feature # HRV-time rs_hrv = pyhrv.time_domain.nni_parameters(ecg_nni) hrv_mean = rs_hrv['nni_mean'] # Feature hrv_std = pyhrv.time_domain.sdnn(ecg_nni)['sdnn'] # Feature rs_nn50 = pyhrv.time_domain.nn50(ecg_nni) hrv_NN50 = rs_nn50['nn50'] # Feature hrv_pNN50 = rs_nn50['pnn50'] # Feature hrv_time = nk.hrv_time(w_ecg_rpeaks, sampling_rate=sampling_rate, show=False) hrv_TINN = hrv_time.loc[0, 'HRV_TINN'] # Feature hrv_rms = pyhrv.time_domain.rmssd(ecg_nni)['rmssd'] # Feature # HRV-freq hrv_freq = pyhrv.frequency_domain.welch_psd(ecg_nni, fbands={'ulf': (0.01, 0.04), 'vlf': (0.04, 0.15), 'lf': (0.15, 0.4), 'hf': (0.4, 1)}, mode='dev') # hrv_freq = hrv_freq.as_dict() hrv_freq = hrv_freq[0] hrv_ULF = hrv_freq['fft_abs'][0] # Feature hrv_LF = hrv_freq['fft_abs'][1] # Feature hrv_HF = hrv_freq['fft_abs'][2] # Feature hrv_VHF = hrv_freq['fft_abs'][3] # Feature hrv_lf_hf_ratio = hrv_freq['fft_ratio'] # Feature hrv_f_sum = hrv_freq['fft_total'] # Feature hrv_rel_ULF = hrv_freq['fft_rel'][0] # Feature hrv_rel_LF = hrv_freq['fft_rel'][1] # Feature hrv_rel_HF = hrv_freq['fft_rel'][2] # Feature hrv_rel_VHF = hrv_freq['fft_rel'][3] # Feature hrv_LFn = hrv_freq['fft_norm'][0] # Feature hrv_HFn = hrv_freq['fft_norm'][1] # Feature # EDA w_eda_data = eda_cleaned[i: window + i] w_eda_phasic_tonic = eda_phasic_tonic[i: window + i] eda_mean = np.mean(w_eda_data) # Feature eda_std = np.std(w_eda_data) # Feature eda_min = np.amin(w_eda_data) # Feature eda_max = np.amax(w_eda_data) # Feature # dynamic range: https://en.wikipedia.org/wiki/Dynamic_range eda_slope = get_slope(w_eda_data) # Feature eda_drange = eda_max / eda_min # Feature eda_scl_mean = np.mean(w_eda_phasic_tonic['EDA_Tonic']) # Feature eda_scl_std = np.std(w_eda_phasic_tonic['EDA_Tonic']) # Feature eda_scr_mean = np.mean(w_eda_phasic_tonic['EDA_Phasic']) # Feature eda_scr_std = np.std(w_eda_phasic_tonic['EDA_Phasic']) # Feature eda_corr_scl_t = nk.cor(w_eda_phasic_tonic['EDA_Tonic'], w_eda_phasic_tonic['t'], show=False) # Feature eda_scr_no = eda_scr_peaks['SCR_Peaks'][i: window + i].sum() # Feature # Sum amplitudes in SCR signal ampl = scr_info['SCR_Amplitude'][i: window + i] eda_ampl_sum = np.sum(ampl[~np.isnan(ampl)]) # Feature # TODO: # eda_t_sum = scr_peaks, scr_properties = scisig.find_peaks(w_eda_phasic_tonic['EDA_Phasic'], height=0) width_scr = scisig.peak_widths(w_eda_phasic_tonic['EDA_Phasic'], scr_peaks, rel_height=0) ht_scr = scr_properties['peak_heights'] eda_scr_area = 0.5 * np.matmul(ht_scr, width_scr[1]) # Feature # EMG ## 5sec w_emg_data = emg_filtered_dc[i: window + i] emg_mean = np.mean(w_emg_data) # Feature emg_std = np.std(w_emg_data) # Feature emg_min = np.amin(w_emg_data) emg_max = np.amax(w_emg_data) emg_drange = emg_max / emg_min # Feature emg_absint = np.abs(np.trapz(w_emg_data)) # Feature emg_median = np.median(w_emg_data) # Feature emg_perc_10 = np.percentile(w_emg_data, 10) # Feature emg_perc_90 = np.percentile(w_emg_data, 90) # Feature emg_peak_freq, emg_mean_freq, emg_median_freq = get_freq_features(w_emg_data) # Features # TODO: PSD -> energy in seven bands # emg_psd = ## 60 sec peaks, properties = scisig.find_peaks(emg_filtered[i: window + i], height=0) emg_peak_no = peaks.shape[0] emg_peak_amp_mean = np.mean(properties['peak_heights']) # Feature emg_peak_amp_std = np.std(properties['peak_heights']) # Feature emg_peak_amp_sum = np.sum(properties['peak_heights']) # Feature emg_peak_amp_max = np.abs(np.amax(properties['peak_heights'])) # https://www.researchgate.net/post/How_Period_Normalization_and_Amplitude_normalization_are_performed_in_ECG_Signal emg_peak_amp_norm_sum = np.sum(properties['peak_heights'] / emg_peak_amp_max) # Feature # Resp w_resp_data = resp_processed[i: window + i] ## Inhalation / Exhalation duration analysis idx = np.nan count = 0 duration = dict() first = True for j in w_resp_data[~w_resp_data['RSP_Phase'].isnull()]['RSP_Phase'].to_numpy(): if j != idx: if first: idx = int(j) duration[1] = [] duration [0] = [] first = False continue # print('New value', j, count) duration[idx].append(count) idx = int(j) count = 0 count += 1 resp_inhal_mean = np.mean(duration[1]) # Feature resp_inhal_std = np.std(duration[1]) # Feature resp_exhal_mean = np.mean(duration[0]) # Feature resp_exhal_std = np.std(duration[0]) # Feature resp_inhal_duration = w_resp_data['RSP_Phase'][w_resp_data['RSP_Phase'] == 1].count() resp_exhal_duration = w_resp_data['RSP_Phase'][w_resp_data['RSP_Phase'] == 0].count() resp_ie_ratio = resp_inhal_duration / resp_exhal_duration # Feature resp_duration = resp_inhal_duration + resp_exhal_duration # Feature resp_stretch = w_resp_data['RSP_Amplitude'].max() - w_resp_data['RSP_Amplitude'].min() # Feature resp_breath_rate = len(duration[1]) # Feature ## Volume: area under the curve of the inspiration phase on a respiratory cycle resp_peaks, resp_properties = scisig.find_peaks(w_resp_data['RSP_Clean'], height=0) resp_width = scisig.peak_widths(w_resp_data['RSP_Clean'], resp_peaks, rel_height=0) resp_ht = resp_properties['peak_heights'] resp_volume = 0.5 * np.matmul(resp_ht, resp_width[1]) # Feature # Temp w_temp_data = data['Temp'][condition][i: window + i].flatten() temp_mean = np.mean(w_temp_data) # Feature temp_std = np.std(w_temp_data) # Feature temp_min = np.amin(w_temp_data) # Feature temp_max = np.amax(w_temp_data) # Feature temp_drange = temp_max / temp_min # Feature temp_slope = get_slope(w_temp_data.ravel()) # Feature # chest_df_5 = chest_df_5.append({ # 'ACC_x_mean': acc_x_mean, 'ACC_y_mean': acc_y_mean, 'ACC_z_mean': acc_z_mean, 'ACC_xzy_mean': xyz_mean, # 'ACC_x_std': acc_x_std, 'ACC_y_std': acc_y_std, 'ACC_z_std': acc_z_std, 'ACC_xyz_std': xyz_std, # 'ACC_x_absint': acc_x_absint, 'ACC_y_absint': acc_y_absint, 'ACC_z_absint': acc_z_absint, 'ACC_xyz_absint': xyz_absint, # 'ACC_x_peak': acc_x_peak, 'ACC_y_peak': acc_y_peak, 'ACC_z_peak': acc_z_peak, # 'EMG_mean': emg_mean, 'EMG_std': emg_std, 'EMG_drange': emg_drange, 'EMG_absint': emg_absint, 'EMG_median': emg_median, 'EMG_perc_10': emg_perc_10, # 'EMG_perc_90': emg_perc_90, 'EMG_peak_freq': emg_peak_freq, 'EMG_mean_freq': emg_mean_freq, 'EMG_median_freq': emg_median_freq # }, ignore_index=True) chest_df = chest_df.append({ 'ACC_x_mean': acc_x_mean, 'ACC_y_mean': acc_y_mean, 'ACC_z_mean': acc_z_mean, 'ACC_xzy_mean': xyz_mean, 'ACC_x_std': acc_x_std, 'ACC_y_std': acc_y_std, 'ACC_z_std': acc_z_std, 'ACC_xyz_std': xyz_std, 'ACC_x_absint': acc_x_absint, 'ACC_y_absint': acc_y_absint, 'ACC_z_absint': acc_z_absint, 'ACC_xyz_absint': xyz_absint, 'ACC_x_peak': acc_x_peak, 'ACC_y_peak': acc_y_peak, 'ACC_z_peak': acc_z_peak, 'ECG_hr_mean': hr_mean, 'ECG_hr_std': hr_std, 'ECG_hrv_NN50': hrv_NN50, 'ECG_hrv_pNN50': hrv_pNN50, 'ECG_hrv_TINN': hrv_TINN, 'ECG_hrv_RMS': hrv_rms, 'ECG_hrv_ULF': hrv_ULF, 'ECG_hrv_LF': hrv_LF, 'ECG_hrv_HF': hrv_HF, 'ECG_hrv_VHF': hrv_VHF, 'ECG_hrv_LFHF_ratio': hrv_lf_hf_ratio, 'ECG_hrv_f_sum': hrv_f_sum, 'ECG_hrv_rel_ULF': hrv_rel_ULF, 'ECG_hrv_rel_LF': hrv_rel_LF, 'ECG_hrv_rel_HF': hrv_rel_HF, 'ECG_hrv_rel_VHF': hrv_rel_VHF, 'ECG_hrv_LFn': hrv_LFn, 'ECG_hrv_HFn': hrv_HFn, 'EDA_mean': eda_mean, 'EDA_std': eda_std, 'EDA_mean': eda_mean, 'EDA_min': eda_min, 'EDA_max': eda_max, 'EDA_slope': eda_slope, 'EDA_drange': eda_drange, 'EDA_SCL_mean': eda_scl_mean, 'EDA_SCL_std': eda_scl_mean, 'EDA_SCR_mean': eda_scr_mean, 'EDA_SCR_std': eda_scr_std, 'EDA_corr_SCL_t': eda_corr_scl_t, 'EDA_SCR_no': eda_scr_no, 'EDA_ampl_sum': eda_ampl_sum, 'EDA_scr_area': eda_scr_area, 'EMG_mean': emg_mean, 'EMG_std': emg_std, 'EMG_drange': emg_drange, 'EMG_absint': emg_absint, 'EMG_median': emg_median, 'EMG_perc_10': emg_perc_10, 'EMG_perc_90': emg_perc_90, 'EMG_peak_freq': emg_peak_freq, 'EMG_mean_freq': emg_mean_freq, 'EMG_median_freq': emg_median_freq, 'EMG_peak_no': emg_peak_no, 'EMG_peak_amp_mean': emg_peak_amp_mean, 'EMG_peak_amp_std': emg_peak_amp_std, 'EMG_peak_amp_sum': emg_peak_amp_sum, 'EMG_peak_amp_norm_sum': emg_peak_amp_norm_sum, 'RESP_inhal_mean': resp_inhal_mean, 'RESP_inhal_std': resp_inhal_std, 'RESP_exhal_mean': resp_exhal_mean, 'RESP_exhal_std': resp_exhal_std, 'RESP_ie_ratio': resp_ie_ratio, 'RESP_duration': resp_duration, 'RESP_stretch': resp_stretch, 'RESP_breath_rate': resp_breath_rate, 'RESP_volume': resp_volume, 'TEMP_mean': temp_mean, 'TEMP_std': temp_std, 'TEMP_min': temp_min, 'TEMP_max': temp_max, 'TEMP_drange': temp_drange, 'TEMP_slope': temp_slope }, ignore_index=True) # index += 1 # if index % 10 == 0: # break print('Elapsed Process', condition.shape[0], str(timedelta(seconds=time.time() - init))) return chest_df, chest_df_5
def corr_and_featurize_ecg(self, recording, sample_freq, r_peaks, s_peaks, q_peaks, p_peaks, t_peaks): """ Automatically derives features from ECG-files (only .dat files for now) Args: R-peaks P-peaks T-peaks features (numpy array of str): an array of ECG-filenames in directory labels (numpy array): an array of labels/diagnosis directory (str): path to the features demographical_data (DataFrame): A DataFrame containing feature name, age and gender Returns: features_out (DataFrame): A DataFrame with features for all ECG-records """ def interval_calc_simple(first_peak, second_peak, sample_freq): try: mean_interval = round((second_peak - first_peak).mean(), 5) except: mean_interval = float("NaN") try: std_interval = round((second_peak - first_peak).std(), 5) except: std_interval = float("NaN") return mean_interval, std_interval feature_list = [] feature_name = [] if len(r_peaks) and len(q_peaks) and len(s_peaks) and len( p_peaks) and len(t_peaks) < 3: try: temp_data = nk.ecg_process(recording, sample_freq)[0] r_peaks = np.where(temp_data['ECG_R_Peaks'] == 1)[0] p_peaks = np.where(temp_data['ECG_P_Peaks'] == 1)[0] q_peaks = np.where(temp_data['ECG_Q_Peaks'] == 1)[0] s_peaks = np.where(temp_data['ECG_S_Peaks'] == 1)[0] t_peaks = np.where(temp_data['ECG_T_Peaks'] == 1)[0] p_onset = np.where(temp_data['ECG_P_Onsets'] == 1)[0] t_offset = np.where(temp_data['ECG_T_Offsets'] == 1)[0] clean_rec = temp_data['ECG_Clean'] analysis = True except: analysis = False r_peaks = np.array([1, 2]) p_peaks = np.array([1, 2]) q_peaks = np.array([1, 2]) s_peaks = np.array([1, 2]) t_peaks = np.array([1, 2]) else: analysis = True clean_rec = nk.ecg_clean(recording) try: r_peaks = processing.peaks.correct_peaks(clean_rec, r_peaks, search_radius=25, smooth_window_size=7, peak_dir='compare') except: r_peaks = r_peaks try: q_peaks = processing.peaks.correct_peaks(clean_rec, q_peaks, search_radius=25, smooth_window_size=7, peak_dir='compare') except: q_peaks = q_peaks try: s_peaks = processing.peaks.correct_peaks(clean_rec, s_peaks, search_radius=25, smooth_window_size=7, peak_dir='compare') except: s_peaks = s_peaks try: t_peaks = processing.peaks.correct_peaks(clean_rec, t_peaks, search_radius=25, smooth_window_size=7, peak_dir='compare') except: t_peaks = t_peaks try: p_peaks = processing.peaks.correct_peaks(clean_rec, p_peaks, search_radius=25, smooth_window_size=7, peak_dir='compare') except: p_peaks = p_peaks if self.rpeak_int == True: feature_name.append("mean_rr_interval") feature_name.append("sd_rr_interval") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append((np.diff(r_peaks) / sample_freq).mean()) feature_list.append((np.diff(r_peaks) / sample_freq).std()) if self.rpeak_amp == True: feature_name.append("mean_r_peak") feature_name.append("sd_r_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append(recording[r_peaks].mean()) feature_list.append(recording[r_peaks].std()) if self.ppeak_int == True: feature_name.append("mean_pp_interval") feature_name.append("sd_pp_interval") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append((np.diff(p_peaks) / sample_freq).mean()) feature_list.append((np.diff(p_peaks) / sample_freq).std()) if self.ppeak_amp == True: feature_name.append("mean_p_peak") feature_name.append("sd_p_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append(clean_rec[p_peaks].mean()) feature_list.append(clean_rec[p_peaks].std()) if self.tpeak_int == True: feature_name.append("mean_tt_interval") feature_name.append("sd_tt_interval") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append((np.diff(t_peaks) / sample_freq).mean()) feature_list.append((np.diff(t_peaks) / sample_freq).std()) if self.tpeak_amp == True: feature_name.append("mean_t_peak") feature_name.append("sd_t_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append(clean_rec[t_peaks].mean()) feature_list.append(clean_rec[t_peaks].std()) if self.qpeak_int == True: feature_name.append("mean_qq_interval") feature_name.append("sd_qq_interval") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append((np.diff(q_peaks) / sample_freq).mean()) feature_list.append((np.diff(q_peaks) / sample_freq).std()) if self.qpeak_amp == True: feature_name.append("mean_q_peak") feature_name.append("sd_q_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append(clean_rec[q_peaks].mean()) feature_list.append(clean_rec[q_peaks].std()) if self.speak_int == True: feature_name.append("mean_q_peak") feature_name.append("sd_q_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append((np.diff(s_peaks) / sample_freq).mean()) feature_list.append((np.diff(s_peaks) / sample_freq).std()) if self.speak_amp == True: feature_name.append("mean_s_peak") feature_name.append("sd_s_peak") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: feature_list.append(clean_rec[s_peaks].mean()) feature_list.append(clean_rec[s_peaks].std()) if self.qrs_duration == True: feature_name.append("qrs_mean") feature_name.append("qrs_std") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: qrs_mean, qrs_std = interval_calc_simple( q_peaks, s_peaks, sample_freq) feature_list.append(qrs_mean) feature_list.append(qrs_std) if self.qt_duration == True: feature_name.append("qt_mean") feature_name.append("qt_std") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: qt_mean, qt_std = interval_calc_simple(q_peaks, t_peaks, sample_freq) feature_list.append(qt_mean) feature_list.append(qt_std) if self.pr_duration == True: feature_name.append("pr_mean") feature_name.append("pr_std") if analysis == False: feature_list.append(float("nan")) feature_list.append(float("nan")) elif analysis == True: pr_mean, pr_std = interval_calc_simple(p_peaks, r_peaks, sample_freq) feature_list.append(pr_mean) feature_list.append(pr_std) feature_list = np.asarray(feature_list) feature_name = np.asarray(feature_name) return feature_list, feature_name, [ p_peaks, q_peaks, r_peaks, s_peaks, t_peaks ]
def get_12ECG_features_labels(data, header_data): tmp_hea = header_data[0].split(' ') ptID = tmp_hea[0] num_leads = int(tmp_hea[1]) sample_Fs= int(tmp_hea[2]) gain_lead = np.zeros(num_leads) for ii in range(num_leads): tmp_hea = header_data[ii+1].split(' ') gain_lead[ii] = int(tmp_hea[2].split('/')[0]) # for testing, we included the mean age of 57 if the age is a NaN # This value will change as more data is being released for iline in header_data: if iline.startswith('#Age'): tmp_age = iline.split(': ')[1].strip() age = int(tmp_age if tmp_age != 'NaN' else 57) elif iline.startswith('#Sex'): tmp_sex = iline.split(': ')[1] if tmp_sex.strip()=='Female': sex =1 else: sex=0 elif iline.startswith('#Dx'): label = iline.split(': ')[1].split(',')[0] signal = data[1] gain = gain_lead[1] N = len(signal) sp= sample_Fs/N # resolución espectral Y = np.fft.fft(signal*gain) ff = np.linspace(0, (N/2)*sp, N/2).flatten() fmax = float(ff[np.where(np.abs(Y[0:N//2]) == max(np.abs(Y[0:N//2])))]) # We are only using data from lead1 peaks,idx = detect_peaks(signal,sample_Fs,gain) # mean mean_RR = np.mean(idx/sample_Fs*1000) mean_R_Peaks = np.mean(peaks*gain) # median median_RR = np.median(idx/sample_Fs*1000) median_R_Peaks = np.median(peaks*gain) # standard deviation std_RR = np.std(idx/sample_Fs*1000) std_R_Peaks = np.std(peaks*gain) # variance var_RR = stats.tvar(idx/sample_Fs*1000) var_R_Peaks = stats.tvar(peaks*gain) # Skewness skew_RR = stats.skew(idx/sample_Fs*1000) skew_R_Peaks = stats.skew(peaks*gain) # Kurtosis kurt_RR = stats.kurtosis(idx/sample_Fs*1000) kurt_R_Peaks = stats.kurtosis(peaks*gain) # RMSSD (HRV) rmssd = np.sqrt(np.mean(np.square(np.diff(idx)))) # All Peaks ecg_signal = nk.ecg_clean(signal*gain, sampling_rate=sample_Fs, method="biosppy") _ , rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs) try: signal_peak, waves_peak = nk.ecg_delineate(ecg_signal, rpeaks, sampling_rate=sample_Fs) t_peaks = waves_peak['ECG_T_Peaks'] p_peaks = waves_peak['ECG_P_Peaks'] q_peaks = waves_peak['ECG_Q_Peaks'] s_peaks = waves_peak['ECG_S_Peaks'] p_onsets = waves_peak['ECG_P_Onsets'] t_offsets = waves_peak['ECG_T_Offsets'] except ValueError: print('Exception raised!') pass # T Peaks t_peaks = np.asarray(t_peaks, dtype=float) t_peaks = t_peaks[~np.isnan(t_peaks)] t_peaks = [int(a) for a in t_peaks] mean_T_Peaks = np.mean([signal[w] for w in t_peaks]) # P peaks p_peaks = np.asarray(p_peaks, dtype=float) p_peaks = p_peaks[~np.isnan(p_peaks)] p_peaks = [int(a) for a in p_peaks] mean_P_Peaks = np.mean([signal[w] for w in p_peaks]) # Q peaks q_peaks = np.asarray(q_peaks, dtype=float) q_peaks = q_peaks[~np.isnan(q_peaks)] q_peaks = [int(a) for a in q_peaks] mean_Q_Peaks = np.mean([signal[w] for w in q_peaks]) # S peaks s_peaks = np.asarray(s_peaks, dtype=float) s_peaks = s_peaks[~np.isnan(s_peaks)] s_peaks = [int(a) for a in s_peaks] mean_S_Peaks = np.mean([signal[w] for w in s_peaks]) # P Onsets p_onsets = np.asarray(p_onsets, dtype=float) # p_onsets = p_onsets[~np.isnan(p_onsets)] mean_P_Onsets = np.mean(p_onsets/sample_Fs*1000) # T Onsets t_offsets = np.asarray(t_offsets, dtype=float) # t_offsets = t_offsets[~np.isnan(t_offsets)] mean_T_offsets = np.mean(t_offsets/sample_Fs*1000) features = [age,sex,fmax,mean_RR,mean_R_Peaks,mean_T_Peaks,mean_P_Peaks,mean_Q_Peaks,mean_S_Peaks,median_RR,median_R_Peaks,std_RR,std_R_Peaks,var_RR,var_R_Peaks,skew_RR,skew_R_Peaks,kurt_RR,kurt_R_Peaks,mean_P_Onsets,mean_T_offsets,rmssd,label] return features
def get_HRVs_values(data, header_data): filter_lowcut = 0.001 filter_highcut = 15.0 filter_order = 1 tmp_hea = header_data[0].split(' ') ptID = tmp_hea[0] num_leads = int(tmp_hea[1]) sample_Fs= int(tmp_hea[2]) gain_lead = np.zeros(num_leads) for ii in range(num_leads): tmp_hea = header_data[ii+1].split(' ') gain_lead[ii] = int(tmp_hea[2].split('/')[0]) # for testing, we included the mean age of 57 if the age is a NaN # This value will change as more data is being released for iline in header_data: if iline.startswith('#Age'): tmp_age = iline.split(': ')[1].strip() age = int(tmp_age if tmp_age != 'NaN' else 57) # age = int(tmp_age) elif iline.startswith('#Sex'): tmp_sex = iline.split(': ')[1] if tmp_sex.strip()=='Female': sex =1 else: sex=0 elif iline.startswith('#Dx'): label = iline.split(': ')[1].split(',')[0] signal = data[1] gain = gain_lead[1] ecg_signal = nk.ecg_clean(signal*gain, sampling_rate=sample_Fs, method="biosppy") _ , rpeaks = nk.ecg_peaks(ecg_signal, sampling_rate=sample_Fs) hrv_time = nk.hrv_time(rpeaks, sampling_rate=sample_Fs) peaks, idx = detect_peaks(signal, sample_Fs, gain) # print(len(signal), len(idx)) rr_intervals = idx / (sample_Fs * 1000) rr_intervals = pd.Series(rr_intervals) rr_ma = rr_intervals.rolling(3) try: signal_peak, waves_peak = nk.ecg_delineate(ecg_signal, rpeaks, sampling_rate=sample_Fs) p_peaks = waves_peak['ECG_P_Peaks'] except ValueError: print('Exception raised!') pass p_peaks = np.asarray(p_peaks, dtype=float) p_peaks = p_peaks[~np.isnan(p_peaks)] p_peaks = [int(a) for a in p_peaks] p_time = [x/sample_Fs for x in p_peaks] p_diff = np.diff(p_time) # mean_P_Peaks = np.mean([signal[w] for w in p_peaks]) hrv_time['var_P_time'] = stats.tvar(p_diff) hrv_time['var_P_peaks'] = stats.tvar(signal[np.array(p_peaks)]) hrv_time['age'] = age hrv_time['label'] = label return hrv_time