def RR_to_features(heart_data): from hrvanalysis import get_frequency_domain_features,get_time_domain_features, get_poincare_plot_features #chuyen heart_rate_list thanh RR_list for i in heart_data: RR_interval.append(60*1000/i) #tinh ra cac features feautures_1 = get_poincare_plot_features(RR_interval) SD1 = feautures_1['sd1'] SD2 = feautures_1['sd2'] feautures_2 = get_frequency_domain_features(RR_interval) LF = feautures_2['lf'] HF = feautures_2['hf'] LF_HF = feautures_2['lf_hf_ratio'] HF_LF = 1/LF_HF LF_NU = feautures_2['lfnu'] HF_NU = feautures_2['hfnu'] TP = feautures_2['total_power'] VLF = feautures_2['vlf'] feautures_3 = get_time_domain_features(RR_interval) pNN50 = feautures_3['pnni_50'] RMSSD = feautures_3['rmssd'] MEAN_RR = feautures_3['mean_nni'] MEDIAN_RR = feautures_3['median_nni'] HR = feautures_3['mean_hr'] SDRR = feautures_3['sdnn'] SDRR_RMSSD = SDRR/RMSSD SDSD = feautures_3['sdsd'] row_list = [["MEAN_RR", "MEDIAN_RR", "SDRR","RMSSD","SDSD","SDRR_RMSSD" ,"HR","pNN50","SD1","SD2","VLF","LF","LF_NU","HF","HF_NU" ,"TP","LF_HF","HF_LF"], [MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,HR,pNN50,SD1,SD2 ,VLF,LF,LF_NU,HF,HF_NU,TP,LF_HF,HF_LF]] return row_list[1]
def get_all_features_hrva(data_sample, sample_rate=100, rpeak_method=0): """ :param data_sample: :param sample_rate: :param rpeak_method: :return: """ if rpeak_method in [1, 2, 3, 4]: detector = PeakDetector() peak_list = detector.ppg_detector(data_sample, rpeak_method)[0] else: rol_mean = rolling_mean(data_sample, windowsize=0.75, sample_rate=100.0) peaks_wd = detect_peaks(data_sample, rol_mean, ma_perc=20, sample_rate=100.0) peak_list = peaks_wd["peaklist"] rr_list = np.diff(peak_list) * (1000 / sample_rate) #1000 milisecond nn_list = get_nn_intervals(rr_list) nn_list_non_na = np.copy(nn_list) nn_list_non_na[np.where(np.isnan(nn_list_non_na))[0]] = -1 time_domain_features = get_time_domain_features(rr_list) frequency_domain_features = get_frequency_domain_features(rr_list) geometrical_features = get_geometrical_features(rr_list) csi_cvi_features = get_csi_cvi_features(rr_list) return time_domain_features,frequency_domain_features,\ geometrical_features,csi_cvi_features
def calc_hrv_params(data, phase): params = {} # Delete 0-Values from Dataset (prevent interpolation of 0-values) data = [i for i in data["IBI"] if i != 0] # remove outlier data points using the hrv analysis package https://github.com/Aura-healthcare/hrvanalysis rr_intervals_without_outliers = hrvanalysis.remove_outliers(rr_intervals=data, low_rri=350, high_rri=1800) # interpolate outliers using the hrv analysis package https://github.com/Aura-healthcare/hrvanalysis preprocessed_data = hrvanalysis.interpolate_nan_values(rr_intervals=rr_intervals_without_outliers, interpolation_method='linear') # If the first or last datapoint is a NaN, it can`t be interpolated and must be kicked cleaned_data = [i for i in preprocessed_data if not np.isnan(i)] # The HRV package calculates different heart rate related parameters (of which only the mean heart rate is used # as a stressmarker in the present study, the full code to calculate all HRV parameters is listed below) # see https://github.com/Aura-healthcare/hrvanalysis # Time Domain Analysis hrv_time_domain = hrvanalysis.get_time_domain_features(cleaned_data) # Only get the mean heart rate parameter params.update({phase[3:] + "_mean_HR": hrv_time_domain["mean_hr"]}) # get all HRV time domain parameters: # Mean_NNI, SDNN, SDSD, NN50, pNN50, NN20, pNN20, RMSSD, Median_NN, # Range_NN, CVSD, CV_NNI, Mean_HR, Max_HR, Min_HR, STD_HR # for key in hrv_time_domain.keys(): # params.update({phase[3:] + "_" + key: hrv_time_domain[key]}) # Frequency Domain Anaylsis # hrv_frequency_domain = hrvanalysis.get_frequency_domain_features(cleaned_data, method='welch', # sampling_frequency=4, # interpolation_method='cubic', # vlf_band=(0.003, 0.04), lf_band=(0.04, 0.15), # hf_band=(0.15, 0.4)) # get all HRV time domain parameters: # LF, HF, VLF, LH/HF ratio, LFnu, HFnu, Total_Power # for key in hrv_frequency_domain.keys(): # params.update({phase[2:] + "_" + key: hrv_frequency_domain[key]}) # Geometrical Analysis # hrv_geometrical_features = hrvanalysis.extract_features.get_geometrical_features(cleaned_data) # get all geometrical analysis parameters: # Triangular_index, TINN # for key in hrv_geometrical_features.keys(): # params.update({phase[2:] + "_" + key: hrv_geometrical_features[key]}) # CSI/CVI analysis # hrv_csi_cvi_features = hrvanalysis.extract_features.get_csi_cvi_features(cleaned_data) # get all CSI/CVI analysis parameters: # CSI, CVI, Modified_CSI, SD1, SD2, SD1/SD2 ratio, SampEn # for key in hrv_csi_cvi_features.keys(): # params.update({phase[2:] + "_" + key: hrv_csi_cvi_features[key]}) return params
def get_feature_names(recording): time_domain_features = hrv.get_time_domain_features(recording["Recording"]["RrInterval"]) geometrical_features = hrv.get_geometrical_features(recording["Recording"]["RrInterval"]) frequency_domain_features = hrv.get_frequency_domain_features(recording["Recording"]["RrInterval"]) csi_cvi_features = hrv.get_csi_cvi_features(recording["Recording"]["RrInterval"]) poincare_plot_features = hrv.get_poincare_plot_features(recording["Recording"]["RrInterval"]) feature_dictionary = { **time_domain_features, **geometrical_features, **frequency_domain_features, **csi_cvi_features, **poincare_plot_features } return [key for key in feature_dictionary.keys()]
def compute_short_term_features_on_interval(features, i, rr_timestamps, rrs): # Adding indexes features[i][FEATURES_KEY_TO_INDEX["interval_index"]] = i features[i][ FEATURES_KEY_TO_INDEX["interval_start_time"]] = i * SHORT_WINDOW rrs_on_interval = get_rr_intervals_on_window(rr_timestamps, rrs, i * SHORT_WINDOW, SHORT_WINDOW) if (len(rrs_on_interval) == 0): raise ValueError("No RR intervals") clean_rrs = get_clean_intervals(rrs_on_interval) time_domain_features = get_time_domain_features(clean_rrs) for key in time_domain_features.keys(): features[i][FEATURES_KEY_TO_INDEX[key]] = time_domain_features[key]
def get_all_features_hrva(s, sample_rate=100, rpeak_method=0,wave_type='ecg'): """ Parameters ---------- data_sample : Raw signal rpeak_method : return: (Default value = 0) sample_rate : (Default value = 100) Returns ------- """ # if rpeak_method in [1, 2, 3, 4]: # detector = PeakDetector() # peak_list = detector.ppg_detector(data_sample, rpeak_method)[0] # else: # rol_mean = rolling_mean(data_sample, windowsize=0.75, sample_rate=100.0) # peaks_wd = detect_peaks(data_sample, rol_mean, ma_perc=20, # sample_rate=100.0) # peak_list = peaks_wd["peaklist"] if wave_type=='ppg': detector = PeakDetector(wave_type='ppg') peak_list, trough_list = detector.ppg_detector(s, detector_type=rpeak_method) else: detector = PeakDetector(wave_type='ecg') peak_list, trough_list = detector.ecg_detector(s, detector_type=rpeak_method) rr_list = np.diff(peak_list) * (1000 / sample_rate) # 1000 milisecond nn_list = get_nn_intervals(rr_list) nn_list_non_na = np.copy(nn_list) nn_list_non_na[np.where(np.isnan(nn_list_non_na))[0]] = -1 time_domain_features = get_time_domain_features(rr_list) frequency_domain_features = get_frequency_domain_features(rr_list) geometrical_features = get_geometrical_features(rr_list) csi_cvi_features = get_csi_cvi_features(rr_list) return time_domain_features, frequency_domain_features, geometrical_features, csi_cvi_features
def recording_to_x_y_feature_regression(recording): time_domain_features = hrv.get_time_domain_features(recording["Recording"]["RrInterval"]) geometrical_features = hrv.get_geometrical_features(recording["Recording"]["RrInterval"]) frequency_domain_features = hrv.get_frequency_domain_features(recording["Recording"]["RrInterval"]) csi_cvi_features = hrv.get_csi_cvi_features(recording["Recording"]["RrInterval"]) poincare_plot_features = hrv.get_poincare_plot_features(recording["Recording"]["RrInterval"]) feature_dictionary = { **time_domain_features, **geometrical_features, **frequency_domain_features, **csi_cvi_features, **poincare_plot_features } x = [value for value in feature_dictionary.values()] y = decade_to_label(recording["AgeDecade"], False) return [y] + x
def RR_to_features(heart_data): print(heart_data) from hrvanalysis import get_frequency_domain_features,get_time_domain_features, get_poincare_plot_features #chuyen heart_rate_list thanh RR_list RR_interval = [] for i in heart_data: RR_interval.append(60*1000/int(i)) #tinh ra cac features feautures_1 = get_poincare_plot_features(RR_interval) SD1 = feautures_1['sd1'] SD2 = feautures_1['sd2'] feautures_2 = get_frequency_domain_features(RR_interval) LF = feautures_2['lf'] HF = feautures_2['hf'] LF_HF = feautures_2['lf_hf_ratio'] HF_LF = 1/LF_HF LF_NU = feautures_2['lfnu'] HF_NU = feautures_2['hfnu'] TP = feautures_2['total_power'] VLF = feautures_2['vlf'] feautures_3 = get_time_domain_features(RR_interval) pNN50 = feautures_3['pnni_50'] RMSSD = feautures_3['rmssd'] MEAN_RR = feautures_3['mean_nni'] MEDIAN_RR = feautures_3['median_nni'] HR = feautures_3['mean_hr'] SDRR = feautures_3['sdnn'] SDRR_RMSSD = SDRR/RMSSD SDSD = feautures_3['sdsd'] row_list = [["MEAN_RR", "MEDIAN_RR", "SDRR","RMSSD","SDSD","SDRR_RMSSD" ,"HR","pNN50","SD1","SD2","VLF","LF","LF_NU","HF","HF_NU" ,"TP","LF_HF","HF_LF"], [MEAN_RR,MEDIAN_RR,SDRR,RMSSD,SDSD,SDRR_RMSSD,HR,pNN50,SD1,SD2 ,VLF,LF,LF_NU,HF,HF_NU,TP,LF_HF,HF_LF]] with open('service/data/final/data_temp.csv', 'w', newline='') as file: writer = csv.writer(file) writer.writerows(row_list) return row_list[1]
def hrvAnalysis(self, times, samples, rrTimes, rrValues): def listSecToMsec(secs): msecs = [] for i in range(len(secs)): msecs.append( int(secs[i] * 1000) ) return msecs def listMsecToSec(msecs): secs = [] for i in range(len(msecs)): secs.append( float(msecs[i]) / 1000) return secs rrValuesMsec = listSecToMsec(rrValues) results = {} results['time_domain'] = get_time_domain_features(rrValuesMsec) results['freq_domain'] = get_frequency_domain_features(rrValuesMsec) results['poincare_plot'] = get_poincare_plot_features(rrValuesMsec) return results
def getSingleIBIfeatures(data): """ INPUT: data: Dataframe of IBI values mapped to timestamps OUTPUT: A single IBI feature vector For more information: https://aura-healthcare.github.io/hrvanalysis/hrvanalysis.html """ if data.empty: return None IBI_data = data['IBI'].astype(float) * 1000 # This remove ectopic beats from signal nn_intervals_list = remove_ectopic_beats(rr_intervals=IBI_data, method="malik") # This replace ectopic beats nan values with linear interpolation interpolated_nn_intervals = interpolate_nan_values( rr_intervals=nn_intervals_list) if not interpolated_nn_intervals[-1] > 1 and len( interpolated_nn_intervals) == 2: interpolated_nn_intervals[-1] = interpolated_nn_intervals[0] if not interpolated_nn_intervals[-1] > 1: interpolated_nn_intervals[-1] = np.median( interpolated_nn_intervals[1:-1]) if not interpolated_nn_intervals[0] > 1: interpolated_nn_intervals[0] = np.median( interpolated_nn_intervals[1:-1]) # get features time_features = get_time_domain_features(interpolated_nn_intervals) freq_features = get_frequency_domain_features(interpolated_nn_intervals, method='lomb') IBI_features_df = pd.DataFrame({ **time_features, **freq_features }, index=[0]) # IBI_features_df.insert(0, "participant", participant) return IBI_features_df
def RR_to_features(RR_interval): feautures_1 = get_poincare_plot_features(RR_interval) SD1 = feautures_1['sd1'] SD2 = feautures_1['sd2'] feautures_2 = get_frequency_domain_features(RR_interval) LF = feautures_2['lf'] HF = feautures_2['hf'] LF_HF = feautures_2['lf_hf_ratio'] HF_LF = 1 / LF_HF LF_NU = feautures_2['lfnu'] HF_NU = feautures_2['hfnu'] TP = feautures_2['total_power'] VLF = feautures_2['vlf'] feautures_3 = get_time_domain_features(RR_interval) pNN50 = feautures_3['pnni_50'] RMSSD = feautures_3['rmssd'] MEAN_RR = feautures_3['mean_nni'] MEDIAN_RR = feautures_3['median_nni'] HR = feautures_3['mean_hr'] SDRR = feautures_3['sdnn'] SDRR_RMSSD = SDRR / RMSSD SDSD = feautures_3['sdsd'] import csv row_list = [[ "MEAN_RR", "MEDIAN_RR", "SDRR", "RMSSD", "SDSD", "SDRR_RMSSD", "HR", "pNN50", "SD1", "SD2", "VLF", "LF", "LF_NU", "HF", "HF_NU", "TP", "LF_HF", "HF_LF" ], [ MEAN_RR, MEDIAN_RR, SDRR, RMSSD, SDSD, SDRR_RMSSD, HR, pNN50, SD1, SD2, VLF, LF, LF_NU, HF, HF_NU, TP, LF_HF, HF_LF ]] with open('data/final/data_user.csv', 'w', newline='') as file: writer = csv.writer(file) writer.writerows(row_list)
def metrics(self): import pyhrv.tools as tools intervalosNN = tools.nn_intervals(self.peaks_fpos) time_domain_features = get_time_domain_features( intervalosNN[intervalosNN != 0]) frecuency_domain_features = get_frequency_domain_features(intervalosNN) geometrical_features = get_geometrical_features(intervalosNN) self.ui.scrollAreaFeatures.setStyleSheet('background-color: white') layout = QHBoxLayout() label = QLabel('<h3>Time Domain Features<h3>') label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setStyleSheet("color: rgb(59,59,59)") label.setMaximumWidth(290) layout.addWidget(label) self.ui.verticalLayout_features.addLayout(layout) time_domain_features_to_show = { 'SDNN': time_domain_features['sdnn'], 'SDSD': time_domain_features['sdsd'], 'SDANN': self.sdann, 'RMSSD': time_domain_features['rmssd'] } time_domain_features_to_show2 = { 'NN20 Count': time_domain_features['nni_20'], 'NN50 Count': time_domain_features['nni_50'], 'PNN50 Count': time_domain_features['pnni_50'], 'PNN20 Count': time_domain_features['pnni_20'] } for key, value in time_domain_features_to_show.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel("{:.4f}".format(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features1.addLayout(layout) for key, value in time_domain_features_to_show2.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel("{:.4f}".format(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features2.addLayout(layout) frecuency_domain_features_to_show = { 'LF': frecuency_domain_features['lf'], 'HF': frecuency_domain_features['hf'], 'VLF': frecuency_domain_features['vlf'] } frecuency_domain_features_to_show2 = { 'LF norm': frecuency_domain_features['vlf'], 'HF norm': frecuency_domain_features['hfnu'], 'Total power': frecuency_domain_features['total_power'] } layout = QHBoxLayout() label = QLabel('<h3>Frecuency Domain Features</h3>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(290) layout.addWidget(label) self.ui.verticalLayout_features3.addLayout(layout) for key, value in frecuency_domain_features_to_show.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel("{:.4f}".format(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features4.addLayout(layout) for key, value in frecuency_domain_features_to_show2.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel("{:.4f}".format(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features5.addLayout(layout) geometrical_features_to_show = {'TINN': geometrical_features['tinn']} geometrical_features_to_show2 = { 'Triangular Index': geometrical_features['triangular_index'] } layout = QHBoxLayout() label = QLabel('<h3>Geometrical Domain Features</h3>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(290) layout.addWidget(label) self.ui.verticalLayout_features6.addLayout(layout) for key, value in geometrical_features_to_show2.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel("{:.4f}".format(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features7.addLayout(layout) for key, value in geometrical_features_to_show.items(): layout = QVBoxLayout() label = QLabel('<h4>' + str(key) + ':</h4>') label.setStyleSheet("color: rgb(59,59,59)") label.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label.setMaximumWidth(122) layout.addWidget(label) label1 = QLabel(str(value)) label1.setStyleSheet( "padding: 5px; border: 1px solid #cccccc; border-radius: 5px; background-color:#cccccc;" ) label1.setAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignVCenter) label1.setMaximumWidth(122) layout.addWidget(label1) self.ui.verticalLayout_features8.addLayout(layout) from hrvanalysis import plot_psd
def getIBIfeatures(data, time_window): """ INPUT: data: Dataframe of IBI values mapped to timestamps OUTPUT: IBI features For more information: https://aura-healthcare.github.io/hrvanalysis/hrvanalysis.html """ timestamp = data.timestamp.values IBI_data = np.array(data['IBI'].astype(float) * 1000) time_features_nn = np.zeros((1, 16)) freq_features_nn = np.zeros((1, 7)) timestamps = [0] for t in timestamp: if t >= timestamp[-1] - time_window: break curr_time = round(t + time_window) if curr_time in timestamps: continue timestamps.append(pd.to_datetime(curr_time, unit='s')) index_less = timestamp <= (t + time_window) index_larger = timestamp >= t index = index_less & index_larger curr_rr_interval = IBI_data[index] # This remove ectopic beats from signal nn_intervals_list = remove_ectopic_beats(rr_intervals=curr_rr_interval, method="malik") # This replace ectopic beats nan values with linear interpolation interpolated_nn_intervals = interpolate_nan_values( rr_intervals=nn_intervals_list) if not interpolated_nn_intervals[-1] > 1 and len( interpolated_nn_intervals) == 2: interpolated_nn_intervals[-1] = interpolated_nn_intervals[0] if not interpolated_nn_intervals[-1] > 1: interpolated_nn_intervals[-1] = np.median( interpolated_nn_intervals[1:-1]) if not interpolated_nn_intervals[0] > 1: interpolated_nn_intervals[0] = np.median( interpolated_nn_intervals[1:-1]) time_domain_features = get_time_domain_features( interpolated_nn_intervals) time_features_nn = np.vstack( (time_features_nn, np.array([ time_domain_features['mean_nni'], time_domain_features['sdnn'], time_domain_features['sdsd'], time_domain_features['nni_50'], time_domain_features['pnni_50'], time_domain_features['nni_20'], time_domain_features['pnni_20'], time_domain_features['rmssd'], time_domain_features['median_nni'], time_domain_features['range_nni'], time_domain_features['cvsd'], time_domain_features['cvnni'], time_domain_features['mean_hr'], time_domain_features['max_hr'], time_domain_features['min_hr'], time_domain_features['std_hr'] ]).reshape(1, 16))) freq_domain_features = get_frequency_domain_features( interpolated_nn_intervals, method='lomb') freq_features_nn = np.vstack( (freq_features_nn, np.array([ freq_domain_features['lf'], freq_domain_features['hf'], freq_domain_features['lf_hf_ratio'], freq_domain_features['lfnu'], freq_domain_features['hfnu'], freq_domain_features['total_power'], freq_domain_features['vlf'] ]).reshape(1, 7))) IBI_features = np.hstack((np.array(timestamps[1:]).reshape( (-1, 1)), time_features_nn[1:, :], freq_features_nn[1:, :])) IBI_features_df = pd.DataFrame( IBI_features, columns=[ 'timestamp', 'mean_nni', 'sdnn', 'sdsd', 'nni_50', 'pnni_50', 'nni_20', 'pnni_20', 'rmssd', 'median_nni', 'range_nni', 'cvsd', 'cvnni', 'mean_hr', 'max_hr', 'min_hr', 'std_hr', 'lf', 'hf', 'lf_hf_ratio', 'lfnu', 'hfnu', 'total_power', 'vlf' ]) # IBI_features_df.insert(0, "participant", participant) return IBI_features_df
def compute_time_features(record): record.time_parameters = get_time_domain_features( record.nn_ints) # Dictionary with parameternames as keys
data = eval(f.readline()) labels = eval(f.readline()) labels_reversed = eval(f.readline()) return data, labels, labels_reversed start_time = time.time() data, labels, labels_reversed = read_data() case = 1 if case == 1: lists = {0: ["N"], 1: ["A"]} elif case == 2: lists = {0: ["A"], 1: ["N", "O", "~"]} df = None for key in lists: for category in lists[key]: for id_ in labels_reversed[category]: if len(data[id_]) > 5: features = get_time_domain_features(data[id_]) if df is None: cols = sorted(features) + ["class"] df = pd.DataFrame(columns=cols) features["class"] = key df = df.append(features, ignore_index=True) df.to_csv("case_{}.csv".format(case)) print(time.time() - start_time)
print("ecg time", time) print("ecg new points", points) #downsampling resampled_signal = scipy.signal.resample(ecg, points) print("ecg resampled len", len(resampled_signal)) #r peak detector detectors = Detectors(256) r_peaks = detectors.engzee_detector(resampled_signal[0:34304]) rr = np.diff(r_peaks) ''' #r peak plot print(r_peaks) plt.figure() plt.plot(ecg[0:2560]) plt.plot(r_peaks, ecg[r_peaks], 'ro') plt.title('Detected R-peaks') plt.savefig('new_downsampled_rpeaks.png', dpi=300) #plot for 5 seconds (2500 points) ''' #HRV time domain parameters time_domain_features = get_time_domain_features(r_peaks) print(time_domain_features) #HRV frequency domain parameters plot_psd(rr, method="welch") plt.show()
def extract_features(data): """ Extract features from heart rate data Parameters ---------- data: dict with key t and hr data['t'] contains a numpy array indicating time in minutes data['hr'] contains a numpy array with the same size of data['t'] indicating heart rate in beats per second Returns ------- df: pandas DataFrame Features in time and frequency domains for each minute """ fs_new = 2.4 # optimized from hyper-parameter tuning thres = 0.015 # optimized from hyper-parameter tuning df = pd.DataFrame() t_hr, hr = data["t"], data["hr"] t_hr, hr_smth = smooth_hr(t_hr, hr) total_minute = int(t_hr[-1] - t_hr[0]) # Resample data for frequency-domain analysis t_interp = np.arange(t_hr[0], t_hr[-1], 1 / fs_new / 60) hr_interp = np.interp(t_interp, t_hr, hr_smth) # Extract features from each segment for minute in range(total_minute - 4): fea_dict = {} idx_1min = (t_hr > minute + 2) & (t_hr < minute + 3) idx_5min = (t_hr > minute) & (t_hr < minute + 5) data_1min, data_5min = hr_smth[idx_1min], hr_smth[idx_5min] hr_interp_1min = hr_interp[(t_interp > minute + 2) & (t_interp < minute + 3)] hr_interp_5min = hr_interp[(t_interp > minute) & (t_interp < minute + 5)] # Discard segment if less than 30 heart beats detected if len(data_1min) < 30: continue # Time-domain features for data_1min md = np.median(data_1min) fea_dict.update({ "md_1min": md, "min_r_1min": data_1min.min() - md, "max_r_1min": data_1min.max() - md, "p25_r_1min": np.percentile(data_1min, 0.25) - md, "p75_r_1min": np.percentile(data_1min, 0.75) - md, "mean_r_1min": data_1min.mean() - md, "std_1min": data_1min.std(), "acf1_1min": pd.Series(hr_interp_1min).autocorr(12), "acf2_1min": pd.Series(hr_interp_1min).autocorr(24), }) # Time-domain features for data_5min md = np.median(data_5min) fea_dict.update({ "md_5min": md, "min_r_5min": data_5min.min() - md, "max_r_5min": data_5min.max() - md, "p25_r_5min": np.percentile(data_5min, 0.25) - md, "p75_r_5min": np.percentile(data_5min, 0.75) - md, "mean_r_5min": data_5min.mean() - md, "std_5min": data_5min.std(), "acf1_5min": pd.Series(hr_interp_5min).autocorr(12), "acf2_5min": pd.Series(hr_interp_5min).autocorr(24), }) # Heart rate variability for data_1min nn_intervals = (np.diff(t_hr[idx_1min]) * 1000 * 60).astype( int) # Unit in ms time_domain_features = get_time_domain_features(nn_intervals) time_domain_features = { f"{key}_1min": value for key, value in time_domain_features.items() } nonlinear_features = get_csi_cvi_features(nn_intervals) nonlinear_features = { f"{key}_1min": value for key, value in nonlinear_features.items() } fea_dict.update(time_domain_features) fea_dict.update(nonlinear_features) # Heart rate variability for data_5min nn_intervals = (np.diff(t_hr[idx_5min]) * 1000 * 60).astype( int) # Unit in ms time_domain_features = get_time_domain_features(nn_intervals) time_domain_features = { f"{key}_5min": value for key, value in time_domain_features.items() } nonlinear_features = get_csi_cvi_features(nn_intervals) nonlinear_features = { f"{key}_5min": value for key, value in nonlinear_features.items() } fea_dict.update(time_domain_features) fea_dict.update(nonlinear_features) # Frequency-domain features freqs, psd = signal.periodogram(hr_interp_5min, fs=fs_new) psd[freqs > 0.1] = 0 fea_dict.update({ "peak": psd.max(), "f_peak": freqs[np.argmax(psd)], "area_total": psd.sum(), "area_lf": psd[freqs < thres].sum(), "area_hf": psd[freqs > thres].sum(), "area_ratio": psd[freqs > thres].sum() / psd[freqs < thres].sum(), }) df = df.append(fea_dict, ignore_index=True) df.dropna(inplace=True) return df
def main(): print() print('***************By Killer Queen***************') # configs: X_train_dir = './data/x_train.npy' y_train_dir = './data/y_train.npy' X_test_dir = './data/x_test.npy' # data_x, data_y, data_x_test = load_data(x_path=X_train_dir, y_path=y_train_dir, x_test_path=X_test_dir) x_train = np.load(X_train_dir) y_train = np.load(y_train_dir) x_test = np.load(X_test_dir) index = np.arange(len(x_train)) np.random.shuffle(index) x_train = x_train[index] y_train = y_train[index] """ for j in range(4): cnt = 0 for i in range(len(x_train)): if cnt > 30: break if y_train[i] == j: plt.clf() plt.plot(x_train[i][:3000]) plt.ylabel('class {}'.format(j)) fig = plt.gcf() fig.savefig('./visualization/{}_{}_0.png'.format(j, cnt)) cnt+=1 """ """ d1 = x_train[y_train==0] d2 = x_train[y_train==1] d3 = x_train[y_train==2] d4 = x_train[y_train==3] ds = [d1, d2, d3, d4] std_lists = [[], [], [], []] for i in range(4): print("#Class {}: {}".format(i, len(ds[i]))) ave_max = 0 ave_min = 0 ave_mean = 0 ave_std = 0 for sample in ds[i]: sample = sample[~np.isnan(sample)] ave_max += np.max(sample) ave_min += np.min(sample) ave_mean += np.mean(sample) ave_std += np.std(sample) std_lists[i].append(np.std(sample)) print("ave max: {}, ave min: {}, ave mean: {}, ave std: {}".format( ave_max/len(ds[i]), ave_min / len(ds[i]), ave_mean / len(ds[i]), ave_std / len(ds[i]), )) plt.clf() plt.hist(np.array(std_lists[i]), bins=40) plt.title('class {} std'.format(i)) plt.savefig('./visualization/class_{}_std_distribution.png'.format(i)) plt.show() """ sample_rate = 300 cnt_max = 30 for j in range(4): cnt = 1 wavelet_energy = np.zeros((cnt_max, 6)) for i in range(len(x_train)): if cnt > cnt_max: break if y_train[i] == j: x = x_train[i] x = x[~np.isnan(x)] sample_count = len(x) t = np.linspace(0, sample_count / sample_rate, sample_count) xFFT = np.abs(np.fft.rfft(x) / sample_count) xFreqs = np.linspace(0, sample_rate / 2, int(sample_count / 2) + 1) plt.clf() plt.figure(figsize=(10, 6)) ax0 = plt.subplot(211) ax0.set_xlabel('Time(s)') ax0.set_ylabel("Amp") ax0.plot(t, x) ax1 = plt.subplot(212) ax1.set_xlabel('Freq(Hz)') ax1.set_ylabel('Power') ax1.plot(xFreqs, xFFT) plt.title('class {}'.format(j)) fig = plt.gcf() fig.savefig('./visualization/{}_{}_fft.png'.format(j, cnt)) #b, a = butter_band_pass_filter(3, 60, sample_rate, order=4) #x = signal.lfilter(b, a, x) out = ecg.ecg(x, sampling_rate=300, show=False) S_pint, Q_point = QS_detect(out[1], 300, out[2], False) time_domain_features = get_time_domain_features(out[2]) templates = out[4] plot_ecg(out[0], x, out[1], out[2], out[3], out[4], out[5], out[6], path='./visualization/{}_{}_lib.png'.format(j, cnt), show=False) coeffs = wavelet_decomposition(np.array(out[1])) a = coeffs[0] #for k, coeff in enumerate(coeffs): # b = coeffs[coeff] # b = b/np.max(np.abs(b)) # wavelet_energy[cnt-1][k] = np.mean(b * b) #_ = wavelet_reconstruction(coeffs, np.array(out[1]), 1, True) print(cnt) cnt += 1 print('Class {}'.format(j)) print('mean:') print(np.mean(wavelet_energy, axis=0)) print('std:') print(np.std(wavelet_energy, axis=0))
peaks = peaks[0] file = open('peaks.csv', 'w') write = csv.writer(file) for x in peaks: file.write(str(x) + '\n') RR_intervals = [] x = len(peaks)-1 i=0 while i < x: RR_intervals.append(peaks[i+1]-peaks[i]) i+=1 #filter any extremes RR_intervals = [x * 5 for x in RR_intervals if x < 300] average_size = sum(RR_intervals)/len(RR_intervals) RR_intervals = [x for x in RR_intervals if x > (average_size-(average_size*0.15)) and x < (average_size+(average_size*0.15))] file = open('RR.csv', 'w') write = csv.writer(file) for x in RR_intervals: file.write(str(x) + '\n') time_domain_features = get_time_domain_features(RR_intervals) print(time_domain_features) #plot_poincare(RR_intervals, plot_sd_features=True) file.close()
while (i<p-1): num[i] = (Sampling_rate/(arr[i+1]-arr[i])*60) print (num[i]) i=i+1 working_data, measures = hp.process(y, Sampling_rate, report_time=True) print('breathing rate is: %s Hz' %measures['breathingrate']) qwe=rms i=len(rms) rms[i-1]=rms[i-2] i=0 while (i<p): rms[i]=rms[i]*1000 i=i+1 time_domain_features = get_time_domain_features(rms) print (time_domain_features) #to calculate LF/HF ratio #LF band is the frequency band between 0.04-0.15 Hz #HF band is the frequency band between 0.15-0.4 Hz # Named Tuple for different frequency bands VlfBand = namedtuple("Vlf_band", ["low", "high"]) LfBand = namedtuple("Lf_band", ["low", "high"]) HfBand = namedtuple("Hf_band", ["low", "high"]) def plot_psd(nn_intervals: List[float], method: str = "welch", sampling_frequency: int = 7, interpolation_method: str = "linear", vlf_band: namedtuple = VlfBand(0.003, 0.04), lf_band: namedtuple = LfBand(0.04, 0.15), hf_band: namedtuple = HfBand(0.15, 0.40)):
def extract_feature(data, raw_signal): feature = [] ts = np.array(data['ts']) filtered = np.array(data['filtered']) rpeaks = np.array(data['rpeaks']) templates_ts = np.array(data['templates_ts']) templates = np.array(data['templates']) heart_rate_ts = np.array(data['heart_rate_ts']) heart_rate = np.array(data['heart_rate']) s_points = np.array(data['s_points']) q_points = np.array(data['q_points']) feature.append(np.std(raw_signal)) feature.append(np.std(filtered)) # RR interval rr_intervals = rpeaks[1:] - rpeaks[:-1] feature.append(np.min(rr_intervals)) feature.append(np.max(rr_intervals)) feature.append(np.mean(rr_intervals)) feature.append(np.std(rr_intervals)) # R amplitude r_apt = np.abs(filtered[rpeaks]) feature.append(np.min(r_apt)) feature.append(np.max(r_apt)) feature.append(np.mean(r_apt)) feature.append(np.std(r_apt)) # Q amplitude q_apt = np.abs(filtered[q_points]) feature.append(np.min(q_apt)) feature.append(np.max(q_apt)) feature.append(np.mean(q_apt)) feature.append(np.std(q_apt)) # QRS duration qrs_duration = s_points - q_points feature.append(np.min(qrs_duration)) feature.append(np.max(qrs_duration)) feature.append(np.mean(qrs_duration)) feature.append(np.std(qrs_duration)) interpolated_nn_intervals = rr_intervals * 10 / 3 time_domain_feature = hrvanalysis.get_time_domain_features( interpolated_nn_intervals) for f_name in time_domain_feature: feature.append(time_domain_feature[f_name]) # geometrical features geo_feature = hrvanalysis.get_geometrical_features( interpolated_nn_intervals) for f_name in geo_feature: if geo_feature[f_name] is None: feature.append(0) else: feature.append(geo_feature[f_name]) # frequency domain features f_feature = hrvanalysis.get_frequency_domain_features( interpolated_nn_intervals) for f_name in f_feature: if f_feature[f_name] is None: feature.append(0) else: feature.append(f_feature[f_name]) # csi cvi features csi_cvi_feature = hrvanalysis.get_csi_cvi_features( interpolated_nn_intervals) for f_name in csi_cvi_feature: if csi_cvi_feature[f_name] is None: feature.append(0) else: feature.append(csi_cvi_feature[f_name]) # get_poincare_plot_features pp_feature = hrvanalysis.get_poincare_plot_features( interpolated_nn_intervals) for f_name in pp_feature: if pp_feature[f_name] is None: feature.append(0) else: feature.append(pp_feature[f_name]) # wavelet energy coeffs = wavelet_decomposition(filtered) for k, coeff in enumerate(coeffs): b = coeffs[coeff] b = b / np.max(np.abs(b)) feature.append(np.mean(b * b)) # wavelet energy coeffs = wavelet_decomposition(raw_signal) for k, coeff in enumerate(coeffs): b = coeffs[coeff] b = b / np.max(np.abs(b)) feature.append(np.mean(b * b)) # template average templates_ave = np.mean(templates, axis=0) templates_ave = templates_ave / np.max(np.abs(templates_ave)) for p in templates_ave: feature.append(p) # template std ave templates_std = np.std(templates, axis=0) templates_std = templates_std / np.max(np.abs(templates_std)) for p in templates_std: feature.append(p) # wavelet 1 wavelet_1 = coeffs['cA5'] wavelet_1 = wavelet_1[:50] for p in wavelet_1: feature.append(p) return np.array(feature)
def windows(d, w, t): r = np.arange(len(d)) s = r[::t] z = list(zip(s, s + w)) f = '{0[0]}:{0[1]}'.format g = lambda t: d.iloc[t[0]:t[1]] #return pd.concat(map(g, z), keys=map(f, z)) return map(g, z) data = pd.read_csv('C:/Users/mob3f/Documents/ESME/results/Clean_Data/Smartwatch_HeartRate_clean.csv') data['RR']= 60000/data.HR time_domain_features = get_time_domain_features(data['RR']) subjects=data.groupby(['DeviceId','condition']) results = [] for name, group in subjects: print (name) for d in windows(group, 120, 60): time_domain_features = get_time_domain_features(d['RR']) time_domain_features['condition']=name[1] time_domain_features['participantId']=name[0] results.append(time_domain_features) clean=pd.DataFrame(results) clean.to_csv('C:/Users/mob3f/Documents/ESME/results/Clean_Data/Smartwatch_HRV.csv')
def hrvAnalysis(times, samples, rrTimes, rrValues): def listSecToMsec(secs): msecs = [] for i in range(len(secs)): msecs.append(int(secs[i] * 1000)) return msecs def listMsecToSec(msecs): secs = [] for i in range(len(msecs)): secs.append(float(msecs[i]) / 1000) return secs rrValuesMsec = listSecToMsec(rrValues) """ # This remove outliers from signal rr_intervals_without_outliers = remove_outliers(rr_intervals=rrValuesMsec, low_rri=300, high_rri=2000) # This replace outliers nan values with linear interpolation interpolated_rr_intervals = interpolate_nan_values(rr_intervals=rr_intervals_without_outliers, interpolation_method="linear") # This remove ectopic beats from signal nn_intervals_list = remove_ectopic_beats(rr_intervals=interpolated_rr_intervals, method="malik") # This replace ectopic beats nan values with linear interpolation interpolated_nn_intervals = interpolate_nan_values(rr_intervals=nn_intervals_list) time_domain_features = get_time_domain_features(interpolated_nn_intervals) """ time_domain_features = get_time_domain_features(rrValuesMsec) print("") print("TIME DOMAIN FEATURES:") for k in time_domain_features.keys(): v = time_domain_features[k] print(" %s : %f" % (k, v)) freq_domain_features = get_frequency_domain_features(rrValuesMsec) print("") print("FREQUENCY DOMAIN FEATURES:") for k in freq_domain_features.keys(): v = freq_domain_features[k] print(" %s : %f" % (k, v)) poincare_plot_features = get_poincare_plot_features(rrValuesMsec) print("") print("POINCARE PLOT FEATURES:") for k in poincare_plot_features.keys(): v = poincare_plot_features[k] print(" %s : %f" % (k, v)) plot_poincare(rrValuesMsec, plot_sd_features=True) """ def rrToTimes(rrList): times = [] prevTime = 0.0 for i in range(len(rrList)): t = prevTime + rrList[i] times.append(t) prevTime = t return times timesInterpolatedNNIntervals = rrToTimes(interpolated_nn_intervals) """ """ plotLeadWithRR("leadII", times, samples, "ECGPU", rrTimes, rrValues, "clean", listMsecToSec(timesInterpolatedNNIntervals), rrValues) """ return True
def print_detection_data(self): """ Method responsible for printing the results. """ print("qrs peaks indices") print(self.qrs_peaks_indices) print("noise peaks indices") print(self.noise_peaks_indices) print("Heart Attack indices") ln = len(self.qrs_peaks_indices) RR_interval = [ self.qrs_peaks_indices[i + 1] - self.qrs_peaks_indices[i] for i in range(ln - 1) ] avrg = np.average(np.array(RR_interval)) HRV_analysis = [] HRV_a = np.sqrt(np.mean(np.square(np.diff(self.qrs_peaks_indices)))) for idx, x in enumerate(RR_interval): if x < (HRV_a * 1.5): HRV_analysis.append(0) else: number_of_missing_peak = int(round(x / HRV_a)) diff = (self.qrs_peaks_indices[idx + 1] - self.qrs_peaks_indices[idx]) # print(diff) for i in range(1, number_of_missing_peak): y = self.qrs_peaks_indices[idx] + int( round(diff * i / number_of_missing_peak)) # print(self.integrated_ecg_measurements[y]) if float(self.integrated_ecg_measurements[y]) <= float( self.find_attackpeaks_limit): self.heart_attack_indices = np.append( self.heart_attack_indices, y) HRV_analysis.append(1) else: self.qrs_peaks_indices = np.append( self.qrs_peaks_indices, y) HRV_analysis.append(0) continue # print(RR_interval) r = np.array(RR_interval) RMSSD = np.sqrt(np.mean(np.square(np.diff(r)))) sdnn = np.std(r) mean_hr = round(60 * 2500 / np.mean(r)) hr = 60 * 1000 / r std_hr = np.std(hr) min_hr = np.min(hr) max_hr = np.max(hr) nnxx = np.sum(np.abs(np.diff(self.qrs_peaks_indices)) > 50) * 1 pnnx = 100 * nnxx / len(self.qrs_peaks_indices) print(self.heart_attack_indices) # print('HRVA : ', HRV_analysis) time_domain_features = get_time_domain_features(RR_interval) print(time_domain_features) # print('RMSSD : ', RMSSD, 'SDNN : ', sdnn, 'Mean_HR : ', mean_hr, "Std_HR : ", std_hr) # print("Min_HR : ", min_hr, 'Max_HR : ', max_hr, "NNXX : ", nnxx, "PNNX : ", pnnx) Rmssd.append(RMSSD) Sdnn.append(sdnn) print('''RMSSD : %s ms Std_HR : %s beats/min Min_HR : %s beats/min Max_HR : %s beats/min NNXX : %s SDNN : %s ms PNNX : %s %% ''' % (RMSSD, std_hr, min_hr, max_hr, nnxx, sdnn, pnnx))
def segment_PPG_SQI_extraction(signal_segment, sampling_rate=100, primary_peakdet=7, secondary_peakdet=6, hp_cutoff_order=(1, 1), lp_cutoff_order=(20, 4), template_type=1): """ Extract all package available SQIs from a single segment of PPG waveform. Return a dataframe with all SQIs and cut points for each segment. Parameters ---------- signal_segment : array-like A segment of raw signal. The length is user defined in compute_SQI() function sampling_rate : int Sampling rate of the signal primary_peakdet : int Selects one of the peakdetectors from the PeakDetector class. The primary one is used to segment the waveform secondary_peakdet : int Selects one of the peakdetectors from the PeakDetector class. The secondary peakdetector is used to compute MSQ SQI hp_cutoff_order : touple (int, int) A high pass filter parameters, cutoff frequency and order Lp_cutoff_order : touple (int, int) A low pass filter parameters, cutoff frequency and order template_type : int Selects which template from the dtw SQI should be used Returns ------- Pandas series object with all SQIs for the given segment """ raw_segment = signal_segment[signal_segment.columns[1]].to_numpy() #Prepare final dictonary that will be converted to dataFrame at the end SQI_dict = { 'first': signal_segment['idx'][0], 'last': signal_segment['idx'][-1] } #Prepare filter and filter signal filt = BandpassFilter(band_type='butter', fs=sampling_rate) filtered_segment = filt.signal_highpass_filter(raw_segment, cutoff=hp_cutoff_order[0], order=hp_cutoff_order[1]) filtered_segment = filt.signal_lowpass_filter(filtered_segment, cutoff=lp_cutoff_order[0], order=lp_cutoff_order[1]) #Prepare primary peak detector and perform peak detection detector = PeakDetector() peak_list, trough_list = detector.ppg_detector(filtered_segment, primary_peakdet) #Helpful lists for iteration variations_stats = ['', '_mean', '_median', '_std'] variations_acf = [ '_peak1', '_peak2', '_peak3', '_value1', '_value2', '_value3' ] stats_functions = [('skewness', sq.standard_sqi.skewness_sqi), ('kurtosis', sq.standard_sqi.kurtosis_sqi), ('entropy', sq.standard_sqi.entropy_sqi)] #Raw signal SQI computation SQI_dict['snr'] = np.mean(sq.standard_sqi.signal_to_noise_sqi(raw_segment)) SQI_dict['perfusion'] = sq.standard_sqi.perfusion_sqi(y=filtered_segment, x=raw_segment) SQI_dict['mean_cross'] = sq.standard_sqi.mean_crossing_rate_sqi( raw_segment) #Filtered signal SQI computation SQI_dict['zero_cross'] = sq.standard_sqi.zero_crossings_rate_sqi( filtered_segment) SQI_dict['msq'] = sq.standard_sqi.msq_sqi(y=filtered_segment, peaks_1=peak_list, peak_detect2=secondary_peakdet) #Per beat SQI calculation dtw_list = sq.standard_sqi.per_beat_sqi(sqi_func=sq.dtw_sqi, troughs=trough_list, signal=filtered_segment, taper=True, template_type=template_type) SQI_dict['dtw_mean'] = np.mean(dtw_list) SQI_dict['dtw_std'] = np.std(dtw_list) correlogram_list = sq.rpeaks_sqi.correlogram_sqi(filtered_segment) for idx, variations in enumerate(variations_acf): try: SQI_dict['correlogram' + variations] = correlogram_list[idx] except Exception as e: return pd.Series(SQI_dict) for funcion in stats_functions: SQI_dict[funcion[0] + variations_stats[0]] = funcion[1](filtered_segment) statSQI_list = sq.standard_sqi.per_beat_sqi(sqi_func=funcion[1], troughs=trough_list, signal=filtered_segment, taper=True) SQI_dict[funcion[0] + variations_stats[1]] = np.mean(statSQI_list) SQI_dict[funcion[0] + variations_stats[2]] = np.median(statSQI_list) SQI_dict[funcion[0] + variations_stats[3]] = np.std(statSQI_list) #================================================== # HRV features #================================================== try: rr_list = np.diff(peak_list) * (1000 / sampling_rate ) # 1000 milisecond nn_list = get_nn_intervals(rr_list) nn_list_non_na = np.copy(nn_list) nn_list_non_na[np.where(np.isnan(nn_list_non_na))[0]] = -1 time_domain_features = get_time_domain_features(rr_list) frequency_domain_features = get_frequency_domain_features(rr_list) except Exception as e: return pd.Series(SQI_dict) # for key in time_domain_features.keys(): SQI_dict[key] = time_domain_features[key] for key in frequency_domain_features.keys(): SQI_dict[key] = frequency_domain_features[key] return pd.Series(SQI_dict)
def process_all_files(self, is_test=False): ''' This function will go through every subject overlapped data and extract the intersect set between hr and acc. the dataset quality control will filter out the RRI dataset with lower bound= 300, upper bound with 1000 the output will be in either test output path or the actual output path. :param is_test: true is for test dataset :return: ''' # load Acc, HR and overlap files if is_test: all_acc_files = [] all_hr_files = [] else: all_acc_files = os.listdir(self.acc_path) all_hr_files = os.listdir(self.hr_path) overlap_df = pd.read_csv( self.overlap_path ) # only do experiment if they have overlapped ECG and Actigraphy total_subjects_list = overlap_df['mesaid'].unique() valid_pids = pd.read_csv( self.cfg.TRAIN_TEST_SPLIT)['uids'].values.tolist() # here we set the valid subject IDs according to a snapshot of MESA data on 2019-05-01. In this # snapshot, we manually checked the aligned data making sure the pre-processing yield satisfied quality of data. # ##### The num of total valid subjects should be 1743 total_subjects_list = list( set(total_subjects_list).intersection(set(valid_pids))) total_processed = [] if not os.path.exists(self.processed_records): with open(self.processed_records, "w") as output: writer = csv.writer(output, lineterminator='\n') writer.writerows(total_processed) # tag = datetime.now().strftime("%Y%m%d-%H%M%S") for PID in total_subjects_list: mesa_id = "%04d" % PID # filter Acc and HR based on the overlap records print('*' * 100) print("Processing subject %s dataset" % mesa_id) acc_inlist_idx = [s for s in all_acc_files if mesa_id in s] hr_inlist_idx = [s for s in all_hr_files if mesa_id in s] feature_list = [] if len(acc_inlist_idx) > 0 and len(hr_inlist_idx) > 0: # get the raw dataset file index acc_file_idx = all_acc_files.index(''.join(acc_inlist_idx)) hr_file_idx = all_hr_files.index(''.join(hr_inlist_idx)) # load Acc and HR into Pandas acc_df = pd.read_csv( os.path.join(self.acc_path, all_acc_files[acc_file_idx])) hr_df = pd.read_csv( os.path.join(self.hr_path, all_hr_files[hr_file_idx])) featnames = get_statistic_feature(acc_df, column_name="activity", windows_size=20) acc_start_idx = overlap_df[overlap_df['mesaid'] == PID]['line'].values[0].astype(int) acc_epochs = hr_df['epoch'].max() # cut the dataset frame from the overlapped start index to the HR end index acc_df = acc_df[acc_start_idx - 1:acc_start_idx + acc_epochs - 1] # recalculate the line to the correct index acc_df['line'] = acc_df['line'] - acc_start_idx + 1 acc_df = acc_df.reset_index(drop=True) # calculate the intersect set between HR and acc and cut HR to align the sequence # ################ Data quality control for Acc ######################## # use marker and activity as the indicator column if the shape is different to 2-dim then drop list_size_chk = np.array(acc_df[['marker', 'activity']].values.tolist()) # check whether the activity is empty if len(list_size_chk.shape) < 2: print( "File {f_name} doesn't meet dimension requirement, it's size is {wrong_dim}" .format(f_name=all_acc_files[acc_file_idx], wrong_dim=list_size_chk.shape)) continue # Cut HRV dataset based on length of Actigraphy dataset if (int(hr_df['epoch'].tail(1)) > acc_df.shape[0]): hr_df = hr_df[hr_df['epoch'] <= acc_df.shape[0]] # remove the noise data points if two peaks overlapped or not wear hr_df = hr_df[hr_df['TPoint'] > 0] # Define RR intervals by taking the difference between each one of the measurements in seconds (*1k) hr_df['RR Intervals'] = hr_df['seconds'].diff() * 1000 hr_df['RR Intervals'].fillna( hr_df['RR Intervals'].mean(), inplace=True) # fill mean for first sample # old method for processing of RR intervals which is inappropriate # sampling_df = pd.concat([sampling_df, t1], axis =0 ) # outlier_low = np.mean(hr_df['HR']) - 6 * np.std(hr_df['HR']) # outlier_high = np.mean(hr_df['HR']) + 6 * np.std(hr_df['HR']) # hr_df = hr_df[hr_df['HR'] >= outlier_low] # hr_df = hr_df[hr_df['HR'] <= outlier_high] # apply HRV-Analysis package # filter any hear rate over 60000/300 = 200, 60000/2000 = 30 clean_rri = hr_df['RR Intervals'].values clean_rri = hrvana.remove_outliers(rr_intervals=clean_rri, low_rri=300, high_rri=2000) clean_rri = hrvana.interpolate_nan_values( rr_intervals=clean_rri, interpolation_method="linear") clean_rri = hrvana.remove_ectopic_beats(rr_intervals=clean_rri, method="malik") clean_rri = hrvana.interpolate_nan_values( rr_intervals=clean_rri) hr_df["RR Intervals"] = clean_rri # calculate the Heart Rate hr_df['HR'] = np.round((60000.0 / hr_df['RR Intervals']), 0) # filter ACC acc_df = acc_df[acc_df['interval'] != 'EXCLUDED'] # filter RRI t1 = hr_df.epoch.value_counts().reset_index().rename( { 'index': 'epoch_idx', 'epoch': 'count' }, axis=1) invalid_idx = set(t1[t1['count'] < 3]['epoch_idx'].values) del t1 hr_df = hr_df[~hr_df['epoch'].isin(list(invalid_idx))] # get intersect epochs hr_epoch_set = set(hr_df['epoch'].values) acc_epoch_set = set(acc_df['line']) # get acc epochs # only keep intersect dataset diff_epoch_set_a = acc_epoch_set.difference(hr_epoch_set) diff_epoch_set_b = hr_epoch_set.difference(acc_epoch_set) acc_df = acc_df[~acc_df['line'].isin(diff_epoch_set_a)] hr_df = hr_df[~hr_df['epoch'].isin(diff_epoch_set_b)] # check see if their epochs are equal assert acc_df.shape[0] == len(hr_df['epoch'].unique()) # filter out any epochs with rri less than 3 hr_epoch_set = set(hr_df['epoch'].values) hr_epoch_set = hr_epoch_set.difference(invalid_idx) for _, hr_epoch_idx in enumerate(list(hr_epoch_set)): # sliding window gt_label = hr_df[hr_df['epoch'] == hr_epoch_idx]["stage"].values[0] if self.hrv_win != 0: offset = int(np.floor(self.hrv_win / 2)) tmp_hr_df = hr_df[hr_df['epoch'].isin( np.arange(hr_epoch_idx - offset, hr_epoch_idx + offset))] else: tmp_hr_df = hr_df[hr_df['epoch'] == hr_epoch_idx] try: # check to see if the first time stamp is empty start_sec = float(tmp_hr_df['seconds'].head(1) * 1.0) except Exception as ee: print("Exception %s, source dataset: %s" % (ee, tmp_hr_df['seconds'].head(1))) # calculate each epochs' HRV features rr_epoch = tmp_hr_df['RR Intervals'].values all_hr_features = {} try: all_hr_features.update( hrvana.get_time_domain_features(rr_epoch)) except Exception as ee: self.log_process(ee, PID, hr_epoch_idx) print("processed time domain features: {}".format( str(ee))) try: all_hr_features.update( hrvana.get_frequency_domain_features(rr_epoch)) except Exception as ee: self.log_process(ee, PID, hr_epoch_idx) print("processed frequency domain features: {}".format( str(ee))) try: all_hr_features.update( hrvana.get_poincare_plot_features(rr_epoch)) except Exception as ee: self.log_process(ee, PID, hr_epoch_idx) print("processed poincare features: {}".format( str(ee))) try: all_hr_features.update( hrvana.get_csi_cvi_features(rr_epoch)) except Exception as ee: self.log_process(ee, PID, hr_epoch_idx) print("processed csi cvi domain features: {}".format( str(ee))) try: all_hr_features.update( hrvana.get_geometrical_features(rr_epoch)) except Exception as ee: self.log_process(ee, PID, hr_epoch_idx) print("processed geometrical features: {}".format( str(ee))) all_hr_features.update({ 'stages': gt_label, 'mesaid': acc_df[acc_df['line'] == hr_epoch_idx]['mesaid'].values[0], 'linetime': acc_df[acc_df['line'] == hr_epoch_idx]['linetime'].values[0], 'line': acc_df[acc_df['line'] == hr_epoch_idx]['line'].values[0], 'wake': acc_df[acc_df['line'] == hr_epoch_idx]['wake'].values[0], 'interval': acc_df[acc_df['line'] == hr_epoch_idx]['interval'].values[0], 'activity': acc_df[acc_df['line'] == hr_epoch_idx] ['activity'].values[0] }) feature_list.append(all_hr_features) # If feature list is not empty if len(feature_list) > 0: hrv_acc_df = pd.DataFrame(feature_list) hrv_acc_df = hrv_acc_df.reset_index(drop=True) del hrv_acc_df['tinn'] # tinn is empty featnames = featnames + ["line"] combined_pd = pd.merge(acc_df[featnames], hrv_acc_df, on='line', how='inner') #combined_pd = combined_pd.reset_index(drop=True) combined_pd['timestamp'] = pd.to_datetime( combined_pd['linetime']) combined_pd['base_time'] = pd.to_datetime('00:00:00') combined_pd['seconds'] = (combined_pd['timestamp'] - combined_pd['base_time']) combined_pd['seconds'] = combined_pd['seconds'].dt.seconds combined_pd.drop(['timestamp', 'base_time'], axis=1, inplace=True) combined_pd['two_stages'] = combined_pd["stages"].apply( lambda x: 1.0 if x >= 1.0 else 0.0) combined_pd.loc[combined_pd['stages'] > 4, 'stages'] = 4 # make sure rem sleep label is 4 combined_pd = combined_pd.fillna(combined_pd.median()) combined_pd = combined_pd[ combined_pd['interval'] != 'EXCLUDED'] aligned_data = self.output_path # standardise and normalise the df feature_list = combined_pd.columns.to_list() std_feature = [ x for x in feature_list if x not in [ 'two_stages', 'seconds', 'interval', 'wake', 'linetime', 'mesaid', 'stages', 'line' ] ] if self.standarize: standardize_df_given_feature(combined_pd, std_feature, df_name='combined_df', simple_method=False) combined_pd.to_csv(os.path.join(aligned_data, (mesa_id + '_combined.csv')), index=False) print("ID: {}, successed process".format(mesa_id)) with open(self.processed_records, "a") as text_file: text_file.write( "ID: {}, successed process \n".format(mesa_id)) total_processed.append( "ID: {}, successed process".format(mesa_id)) else: print("Acc is empty or HRV is empty!") total_processed.append( "ID: {}, failed process".format(mesa_id)) with open(self.processed_records, "a") as text_file: text_file.write("ID: {}, failed process".format(mesa_id))
stdIBI = np.std(IBI_ts) IBI_cleaned = [] outliers = 0 for element in IBI_ts: if element < (meanIBI + (4 * stdIBI)) and (element > meanIBI - (4 * stdIBI)): IBI_cleaned.append(element) else: outliers += 1 percent_removed = (outliers/len(IBI_ts)) * 100; #if we're removing more than 20% of the data then something is wrong and this file should be flagged if percent_removed < 20: time_domain_features = hrv.get_time_domain_features(IBI_cleaned) geometrical_features = hrv.get_geometrical_features(IBI_cleaned) frequency_domain_features = hrv.get_frequency_domain_features(IBI_cleaned) csi_cvi_features = hrv.get_csi_cvi_features(IBI_cleaned) poincare_plot_features = hrv.get_poincare_plot_features(IBI_cleaned) sampen = hrv.get_sampen(IBI_cleaned) td_keys = list(time_domain_features.keys()) geom_keys = list(geometrical_features.keys()) frequency_keys = list(frequency_domain_features.keys()) csi_keys = list(csi_cvi_features.keys()) poincare_keys = list(poincare_plot_features.keys()) samp_keys = list(sampen.keys()) #format header of output header = []
def calculate_bvp_f(bvp_data, sample_rate, bvp_time, bvp_chunks): features_chunks = [] for chunk in range(len(bvp_chunks)): if bvp_chunks[chunk] == None: features_chunks.extend([None]) continue bvpData = list(map(lambda x: x['data'], bvp_chunks[chunk])) chunk_time = bvp_chunks[chunk][-1]['timeStamp'] - bvp_chunks[chunk][0][ 'timeStamp'] chunk_s_r = len(bvpData) / chunk_time if not chunk_s_r + 30 >= sample_rate: features_chunks.extend([None]) continue bandpass = signalsTools.filter_signal(ftype='FIR', sampling_rate=chunk_s_r, band='bandpass', frequency=[0.5, 4], signal=bvpData, order=4) # all_working_data, all_measures = hp.process(bandpass[0], sample_rate=chunk_s_r,calc_freq=True) all_working_data, all_measures = hp.process(np.asarray(bvpData), sample_rate=chunk_s_r) hp.plotter(all_working_data, all_measures) result = biosppy.signals.bvp.bvp(signal=np.asarray(bvpData), sampling_rate=chunk_s_r, show=True) result = fd.welch_psd(nni=np.asarray(bvpData)) # RRI_DF = getRRI(np.asarray(bvpData), column2, sample_rate) # HRV_DF = getHRV(RRI_DF, np.mean(HR)) # print(result['fft_total']) result.fft_plot() f, Pxx_den = signal.welch(np.asarray(bvpData)) plt.semilogy(f, Pxx_den) plt.ylim([0.5e-3, 1]) plt.xlabel('frequency [Hz]') plt.ylabel('PSD [V**2/Hz]') plt.show() plt.plot(all_working_data['RR_list']) plt.show() # features = { # 'HR_avg': all_measures['bpm'], # 'NN_avg': all_measures['ibi'], # 'SDNN': all_measures['sdnn'], # 'SDSD': all_measures['sdsd'], # 'RMSSD': all_measures['rmssd'], # 'pNN20': all_measures['pnn20'], # 'pNN50': all_measures['pnn50'], # 'hrMad': all_measures['hr_mad'], # 'BreR': all_measures['breathingrate'], # 'lf': all_measures['lf'], # 'hf': all_measures['hf'], # 'lf/hf': all_measures['lf/hf'] # } time_domain_features = get_time_domain_features( all_working_data['RR_list']) freq_domain_features = get_frequency_domain_features( all_working_data['RR_list']) sampen_domain_features = get_sampen(all_working_data['RR_list']) features = { 'co_he': freq_domain_features['total_power'] / (freq_domain_features['hf'] + freq_domain_features['lf']) } features.update(time_domain_features) features.update(freq_domain_features) features.update(sampen_domain_features) # features.update({'ApEN':get_apen(all_working_data['RR_list'], 2, (0.2 * features['SDNN']))}) features.update({ 'ApEN': get_apen(all_working_data['RR_list'], 2, (0.2 * features['sdnn'])) }) # samp_enn = sampen2(all_working_data['RR_list']) # features['sampEn'] = samp_enn['sampen'] SD1 = (1 / np.sqrt(2)) * features[ 'sdsd'] # measures the width of poincare cloud https://github.com/pickus91/HRV/blob/master/poincare.py SD2 = np.sqrt( (2 * features['sdnn']**2) - (0.5 * features['sdsd']**2)) # measures the length of the poincare cloud features['SD1'] = SD1 features['SD2'] = SD2 features_chunks.extend([features]) return features_chunks