def read_annotation(annotation_num, sampfrom=0, sampto=RECORD_LENGTH, beat_types=['N', 'V']): # Read in original file containing all annotations annotation = wfdb.rdann('data/' + str(annotation_num), 'atr') # Annotation sample locations and corresponding symbols (e.g. 'V') samples = annotation.sample symbols = annotation.symbol # Determine the sample locations and symbols of desired beats selected_samples = [ samples[i] for i in range(len(samples)) if symbols[i] in beat_types ] selected_symbols = [ symbols[i] for i in range(len(symbols)) if symbols[i] in beat_types ] # Write annotation file and read back in to create Annotation object wfdb.wrann(str(annotation_num) + '_select', 'atr', np.asarray(selected_samples), selected_symbols, fs=360) ecg_annotation = wfdb.rdann(str(annotation_num) + '_select', 'atr', sampfrom=sampfrom, sampto=sampto, shift_samps=True) os.remove(str(annotation_num) + '_select.atr') return ecg_annotation
def create_annotation(samples, symbols, sampfrom=0, sampto=RECORD_LENGTH, beat_types=['V']): # Determine the sample locations and symbols of desired beats selected_samples = [ samples[i] for i in range(len(samples)) if symbols[i] in beat_types ] selected_symbols = [ symbols[i] for i in range(len(symbols)) if symbols[i] in beat_types ] # Write annotation file and read back in to create Annotation object wfdb.wrann('temp_delete_me', 'atr', np.asarray(selected_samples), np.asarray(selected_symbols), fs=360) ecg_annotation = wfdb.rdann('temp_delete_me', 'atr', sampfrom=sampfrom, sampto=sampto, shift_samps=True) os.remove('temp_delete_me.atr') return ecg_annotation
def save_annotations(self): for test_record, trigger in zip(self.test_records, self.detected_triggers): if len(trigger) == 0: continue wfdb.wrann(self._file_name_for(test_record), "atr", np.array(trigger), ['N'] * len(trigger), write_dir=self.output_dir)
def remove_non_beat_for_all(self): for signal_name in os.listdir("sample"): if signal_name.endswith(".atr"): name = signal_name.replace(".atr", "") new_sample, new_symbol = self.remove_non_beat("sample/" + name) wfdb.wrann(name, "atr", np.asarray(new_sample), np.asarray(new_symbol)) os.system("mv " + signal_name + " annotations/beat")
def test_save_annotation(self): wfdb.wrann("test_record_save_ann", 'atr', np.array([1]), np.array(["N"]), fs=360, write_dir="data/ann") a = [1, 2, 4, 4, 5, 6, 7, 8, 1, 5, 3, 5, 6, 6] a = list(map(lambda x: list([x]), a)) wfdb.wrsamp("test_record_save_ann", 360, ["mV"], ["I"], np.array(a, np.float64), comments=None, base_date=None, base_time=None, write_dir="data/ann") wfdb.rdann("data/ann/test_record_save_ann_fs", "atr")
def save_annotations(self): for test_record, trigger in zip(self.test_records, self.detected_triggers): if len(trigger) == 0: continue filename = "{}_{}_{}".format(self.id, repr(self.detector), test_record.record_name) wfdb.wrann(filename, "atr", np.array(trigger), ['N'] * len(trigger), write_dir=self.output_dir)
def test_pure_prediction(self): record_name = self.mitdb + "100" sig, fields = wfdb.rdsamp(record_name, channels=[0]) res = processing.gqrs_detect(sig, fs=fields['fs']) wfdb.wrann("100", 'atr', res, write_dir="data", symbol=(['N'] * len(res))) print(res) self.assertIsNotNone(res)
def write_to(self, record_name: str, dir_path: str, *, fmt: str = "32") -> None: """ Params: record_name: name of record (i.e. of header and data file) dir_path: name of target directory fmt: physionet data format for digital signals (verified formats: 16, 32) """ import wfdb wfdb.wrsamp(record_name, fs=self.fs, units=self.units, p_signal=self.data, sig_name=self.channels, write_dir=dir_path, fmt=[fmt] * self.data.shape[1]) for annotator in self.annotations.keys(): sample = self.annotations[annotator]["sample"] symbols = [self.annotations[annotator]["symbol"]] * len(sample) wfdb.wrann(record_name, annotator, sample, symbol=symbols, write_dir=dir_path)
def save_typed_datasets(self, symbol, datasets): for idx, samples in enumerate(datasets): anno = self.test_annotations[symbol][idx] meta = self.test_fields[symbol][idx] self.last_written_idx.setdefault(symbol, 0) corrected_idx = self.last_written_idx[symbol] + idx record_name = "{}_{}".format(symbol.replace(".", "-"), corrected_idx) try: reshaped_data = np.reshape(samples, (-1, 1)).astype(np.float64) max_min = max(np.max(reshaped_data), abs(np.min(reshaped_data))) if max_min != 0: reshaped_data = reshaped_data / max_min np.nan_to_num(reshaped_data) sig_names = [sn.replace(" ", "") for sn in meta['sig_name']] wfdb.wrsamp(record_name, self.__target_frequency__, meta['units'], sig_names, p_signal=reshaped_data, fmt=["32"], comments=meta['comments'], base_date=meta['base_date'], base_time=meta['base_time'], write_dir=os.path.join(self.data_folder_path, symbol)) wfdb.wrann(record_name, 'atr', np.array(anno), np.array([symbol[0]] * len(anno)), fs=self.__target_frequency__, write_dir=os.path.join(self.ann_data_path)) with open( os.path.join(self.data_folder_path, symbol, "RECORDS"), 'a') as records_file: records_file.writelines([record_name + "\n"]) except ValueError as v: print("Failed to write", record_name, "because", str(v)) self.clean_up_wrong_writes(symbol, record_name) print(symbol[0], int(symbol.split("_")[-1])) self.collected_data[symbol[0]][int(symbol.split("_")[-1])] -= 1 self.failed_writes.setdefault(symbol, 0) self.failed_writes[symbol] += 1 self.last_written_idx[symbol] += len(datasets)
def clean_signal(self, sample_name): print(sample_name) record = wfdb.rdrecord("sample/" + sample_name) channel = [] for elem in record.p_signal: channel.append(elem[0]) annotation = wfdb.rdann("annotations/beat/" + sample_name, "atr") samples = annotation.sample symbols = annotation.symbol new_sample, new_symbol = self.update_annotations( channel, samples, symbols) new_sample = np.asarray(new_sample) new_symbol = np.asarray(new_symbol) wfdb.wrann(sample_name, "atr", new_sample, new_symbol) os.system("mv " + sample_name + ".atr" + " annotations/cleaned")
def test_ecg_detectors_package(self): for i in range(100, 300): try: record_name = self.mitdb + str(i) sig, fields = wfdb.rdsamp(record_name, channels=[0]) detectors = Detectors(fields['fs']) r_peaks = detectors.pan_tompkins_detector(sig[:, 0]) samples = np.array(r_peaks) wfdb.wrann(str(i), 'atr', sample=samples, write_dir="data/ann", symbol=(['N'] * len(r_peaks))) print(i) except Exception as e: print(i, e)
def remove_non_beat_for_all(self, database): NON_BEAT_ANN = [ 'x', '(', ')', 'p', 't', 'u', '`', '\'', '^', '|', '~', '+', 's', 'T', '*', 'D', '=', '"', '@', '[', ']' ] for signal_name in os.listdir('database/' + database + '/original_annotations'): if signal_name.endswith(".atr"): name = signal_name.replace(".atr", "") if name != 'I04' and name != 'I17' and name != 'I35' and name != 'I44' and name != 'I57' and \ name != 'I72' and name != 'I74': print(name) beat_ann, beat_symbol = self.remove_non_beat( 'database/' + database + '/original_annotations/' + name, NON_BEAT_ANN) wfdb.wrann(name, 'atr', sample=np.asarray(beat_ann), symbol=np.asarray(beat_symbol))
def CPSC2MIT(): data_path = 'mit-bih-arrhythmia-database-1.0.0/CPSC2019/data/' ref_path = 'mit-bih-arrhythmia-database-1.0.0/CPSC2019/ref/' for i in range(2000): print("processing round:", i) index = "%05d" % (i + 1) data_name = data_path + 'data_' + index + '.mat' ref_name = ref_path + 'R_' + index + '.mat' ecg_data = scio.loadmat(data_name)['ecg'] ecg_ref_2d = scio.loadmat(ref_name)['R_peak'] ecg_ref = list() symbols = list() for i in range(len(ecg_ref_2d)): ecg_ref.append(ecg_ref_2d[i][0]) symbols.append('N') ecg_ref = np.array(ecg_ref) wfdb.wrsamp('CPSC' + index, fs=500, units=['mV'], sig_name=['I'], p_signal=ecg_data, fmt=['212']) wfdb.wrann('CPSC' + index, 'atr', ecg_ref, symbol=symbols)
def aip_detector(file_dir, channel_num, pattern_width, pattern_time_width, normalize, initial_threshold, graphics, create_annotation): # Load the ECG da, info = wfdb.rdsamp(file_dir, sampfrom=0, channels=[channel_num]) data = pd.DataFrame({'hart': da[:, 0]}) # N is the pattern width N = pattern_width # g(n) : Gaussian function sigma = (N - 1) / 5 g = scipy.signal.gaussian(N, ((N - 1) / 5)) # Pattern: p = dg * g dg = np.diff(g) g = g[1:] # Delete last element p = dg * g # If normalize = true if (normalize == True): maxim = np.max(np.abs(p)) p = p / maxim # Filtering s = np.array(data['hart']) rise_det = lfilter(p, 1, np.flip(s)) rise_det = lfilter(p, 1, np.flip(rise_det)) # Low Pass filter pattern_size = 2 * np.round(pattern_time_width / 2 * info['fs']) + 1 lp_size = np.round(1.2 * pattern_size) vector = np.ones(int(lp_size)) vector = vector / lp_size rise_det = lfilter(vector, 1, np.flip(np.abs(rise_det))) rise_det = lfilter(vector, 1, np.flip(rise_det)) # Here we obtain the maximum values above 30% of the observations initial_thr = initial_threshold actual_thr = np.percentile(rise_det, initial_thr) peaks_loc, _ = find_peaks(rise_det, height=actual_thr) if (graphics == True): plt.figure() plt.title("Results") plt.plot(rise_det, alpha=0.5, color='green') plt.plot(s, alpha=0.5, color='black') plt.scatter(peaks_loc, rise_det[peaks_loc], alpha=0.9, color='blue', marker="X") plt.show() # First statistical threshold q = list(range(1, 100)) prctile_grid = np.percentile(rise_det[peaks_loc], q) grid_step = np.median(np.diff(prctile_grid)) max_values = np.max(rise_det[peaks_loc]) thr_grid = np.arange(actual_thr, max_values, grid_step) hist_max_values, a = np.histogram(rise_det[peaks_loc], thr_grid) if (graphics == True): plt.figure() plt.hist(rise_det[peaks_loc], bins=len(thr_grid)) plt.show() # Last statistical threshold first_bin_idx = 1 thr_idx = find_peaks(hist_max_values) thr_max = hist_max_values[thr_idx[0]] thr_idx_expected = np.floor( np.dot(thr_idx[0], thr_max) * (1 / np.sum(thr_max))) aux_seq = np.array(range(1, len(thr_grid))) hist_max_values = hist_max_values[aux_seq < thr_idx_expected] min_hist_max_values = np.min(hist_max_values) aux_seq = aux_seq[aux_seq < thr_idx_expected] aux_seq = aux_seq[aux_seq >= first_bin_idx] thr_min_idx = np.round( np.mean(np.nonzero(aux_seq & hist_max_values == min_hist_max_values))) actual_thr = thr_grid[int(thr_min_idx)] thr_grid = np.arange(actual_thr, max_values, grid_step) # Here we plot the thresholds on the histogram if (graphics == True): plt.axvline(x=actual_thr, color='red', label='Threshold_2') plt.axvline(x=thr_grid[int(thr_idx_expected)], color='orange', label='Threshold_1') plt.legend(framealpha=1, frameon=True) plt.figure() plt.hist(rise_det[peaks_loc], bins=thr_grid) plt.show() # Final results peaks_loc, _ = find_peaks(rise_det, height=actual_thr) if (graphics == True): plt.figure() plt.title("Results") plt.plot(rise_det, alpha=0.5, color='green') plt.plot(s, alpha=0.5, color='black') plt.scatter(peaks_loc, rise_det[peaks_loc], alpha=0.9, color='blue', marker="X") plt.show() if (create_annotation == True): annotation = wfdb.rdann(file_dir, 'atr') annotation.anntype = np.full(len(peaks_loc), 'N') annotation.annsamp = peaks_loc wfdb.wrann('aip_annotations', 'hea', annotation.annsamp, annotation.anntype) ann = wfdb.rdann( '/home/jorge/Escritorio/Isquemia/Proyectos en Python/aip_detector/aip_annotations', 'atr')
def save_prediction(r_peaks, record, save_path): if len(r_peaks) > 0: samples = np.array(r_peaks) wfdb.wrann(record, 'atr', sample=samples, write_dir=save_path, symbol=(['N'] * len(r_peaks)))
import qrs_detection import wfdb import numpy as np from os import listdir from os.path import isfile, join if __name__ == '__main__': root = '../data/mit-bih-arrhythmia-database-1.0.0' files = [file[0:3] for file in listdir(root) if isfile(join(root, file))] files = sorted(list(set(files))) for file in files: print(file) signal = wfdb.rdrecord(join(root, file)) samples = signal.p_signal[:, 0] fs = signal.fs final_peaks = qrs_detection.detect_qrs(samples, cutoff_low=15, cutoff_high=5, fs=fs, order=3) wfdb.wrann(file, 'pred', np.array(final_peaks), symbol=['N'] * len(final_peaks), write_dir=root)
#machine learning model(SVM) clf_rbf.fit(x_train, y_train) pred_valid = clf_rbf.predict(x_valid) print(classification_report(y_valid, pred_valid)) fpr, tpr, thresholds = metrics.roc_curve(y_valid, pred_valid) print(metrics.auc(fpr, tpr)) #3. The assigned 'V' beat info shall be exported to WFDB format (*.test), # and sent back to Biofourmis. for index in range(1, 3): path = "C:\\E\\Jobs\\Biofourmis\\ECG\\database\\test\\b" + str(index) x_test, location = extract_data_from_test_file(path) n = len(x_test) x_test = np.array(x_test) testingdata1 = simple_f(x_test, n) testingdata2 = wavelets_f(x_test) x_feature_test = np.hstack((testingdata1, testingdata2)) predicted_labels = clf_rbf.predict(x_feature_test) ecg_sig, ecg_type, ecg_peak = read_ecg(path) for i in range(len(location)): if predicted_labels[i] == 1: ecg_type[location[i]] = "V" name = "b" + str(index) wfdb.wrann(name, 'test', ecg_peak, ecg_type, write_dir='C:\\E\\Jobs\\Biofourmis\\ECG\\database\\test\\')
import wfdb from ecgdetectors import Detectors import numpy as np DATA_PATH = "/data/" SAVE_PATH = "/pred/" with open(DATA_PATH + "RECORDS", 'r') as f: records = f.readlines() records = list(map(lambda r: r.strip("\n"), records)) for record in records: sig, fields = wfdb.rdsamp(DATA_PATH + record, channels=[0]) detectors = Detectors(fields['fs']) r_peaks = detectors.pan_tompkins_detector(sig[:, 0]) if len(r_peaks) > 0: samples = np.array(r_peaks) wfdb.wrann(record, 'atr', sample=samples, write_dir=SAVE_PATH, symbol=(['N'] * len(r_peaks)))
def UCDDB_ResampleAnnotations(path_source, path_target, source_file_portfix='', target_file_postfix='', preserve_input_size=False, ignore_first_timeframe_during_overlap=True, create_annotationfiles_as_ascii=False, print_log=True): # NAME and EXPERIMENT START are obtained manually from 'SubjectDetails.xls' and is needed for the resampling datasets = [ ['ucddb002', '00:11:04'], ['ucddb003', '23:07:50'], ['ucddb005', '23:28:42'], ['ucddb006', '23:57:14'], ['ucddb007', '23:30:22'], ['ucddb008', '23:29:11'], ['ucddb009', '22:35:22'], ['ucddb010', '22:51:18'], ['ucddb011', '22:47:38'], ['ucddb012', '23:23:21'], ['ucddb013', '23:44:00'], ['ucddb014', '23:37:59'], ['ucddb015', '23:02:45'], ['ucddb017', '23:16:05'], ['ucddb018', '23:49:02'], ['ucddb019', '23:30:33'], ['ucddb020', '23:48:21'], ['ucddb021', '22:52:05'], ['ucddb022', '23:35:05'], ['ucddb023', '22:55:51'], ['ucddb024', '22:58:02'], ['ucddb025', '00:25:37'], ['ucddb026', '22:58:13'], ['ucddb027', '22:56:30'], ['ucddb028', '00:29:08'], ] # loop through all data sets for dataset_i in range(len(datasets)): # ----------------------------------------------------------------------- # Init # ----------------------------------------------------------------------- dataset_name = datasets[dataset_i][ 0] + source_file_portfix # f.ex. ucddb002 experiment_start_time = datasets[dataset_i][ 1] #datetime as string f.ex. 23:45:34 print('Starting to process: ', dataset_name) # f.ex. MyPath\\ucddb002_respevt.txt annotation_source_path = path_source + '\\' + dataset_name + '_respevt.txt' # f.ex. MyPath\\ucddb002 record_source_path = path_source + '\\' + dataset_name # f.ex. MyPath\\MySubFolder\\ucddb002_resampledAnn annotation_file_name = dataset_name + target_file_postfix # f.ex. MyPath\\MySubFolder\\Resampling_AnnotationInfo.txt # (used to store informations about the resamlings) target_path_resampling_info = path_target + '\\' + 'Resampling_AnnotationInfo.txt' # get the frequency and duration of the record. This may not be the best way just to get those values # because the signals are loaded too (unnecessary overhead), but this database is not too big... signals, fields = wfdb.rdsamp(record_source_path) record_length = fields['sig_len'] record_frequency = fields['fs'] # calculate total duration in seconds record_duration_in_seconds = record_length / record_frequency # convert string to date time start_time = datetime.strptime(experiment_start_time, '%H:%M:%S') # get annotation types (f.ex. none, HYP-C, HYP-O, APNEA-O, etc.) for the complete record in 1Hz # (one annotation per second) annotations_std_types = UCDDB_LoadAnnonationsTXTFileStandardized( annotation_source_path, start_time=start_time, duration_in_seconds=record_duration_in_seconds) # convert list with annotation types to a simple list with 1 and 0 # 0 = no apnea, 1 = apnea (the apnea type does not matter) #annotations_std_binary = [not (type == 'none') for type in annotations_std_types] annotations_std_binary = [(type == 'APNEA-O') for type in annotations_std_types] # --------------------------------------------------------------- # Resampling # --------------------------------------------------------------- # resample annotations resampled_ann = ResampleAnnotations( annotations=annotations_std_binary, source_sample_frequency=1, target_sample_frequency=(1 / 60), preserve_input_size=preserve_input_size, ignore_first_timeframe_during_overlap= ignore_first_timeframe_during_overlap, ignore_short_apnea_in_timeframe=False) # convert resampled annotations (1 or 0) to symbols ('A' or 'N') symbol_resampled_ann = [ OneOrZeroToAorN(sample) for sample in resampled_ann ] # Generate sample indices to map the annotations to the record # (f.ex. if 100Hz, the indices are [0, 6000, 12000, 18000, ...] symbol_resampled_ann_sampleIndices = [ i * 60 * record_frequency for i in range(len(symbol_resampled_ann)) ] # --------------------------------------------------------------------------------- # Write annotation files # --------------------------------------------------------------------------------- print('Write annotation file into ', path_target) if print_log: print('Sample indices', symbol_resampled_ann_sampleIndices) if print_log: print('Apnea symbols', symbol_resampled_ann) wfdb.wrann(record_name=annotation_file_name, extension='apn', sample=np.array(symbol_resampled_ann_sampleIndices), symbol=np.array(symbol_resampled_ann), write_dir=path_target) # --------------------------------------------------------------------------------- # Write log file with additional information # --------------------------------------------------------------------------------- count_apnea = annotations_std_binary.count(1) count_no_apnea = annotations_std_binary.count(0) percentage_apnea = count_apnea / len(annotations_std_binary) * 100 percentage_no_apnea = count_no_apnea / len( annotations_std_binary) * 100 with open(target_path_resampling_info, mode='a+', newline='') as csv_file: fieldnames = \ ['RecordName', 'Frequency', 'NumberSamples', 'TotalDuration[h]', 'Apnea[%]', 'NoApnea[%]' ] writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter='\t') # Print header only in first iteration if dataset_i == 0: writer.writeheader() writer.writerow({ 'RecordName': dataset_name, 'Frequency': record_frequency, 'NumberSamples': record_length, 'TotalDuration[h]': round(record_duration_in_seconds / 60 / 60, 2), 'Apnea[%]': round(percentage_apnea, 2), 'NoApnea[%]': round(percentage_no_apnea, 2) }) print() # new line #TODO #createScalograms() return
import wfdb from wfdb import processing import numpy as np DATA_PATH = "/data/" SAVE_PATH = "/pred/" with open(DATA_PATH + "RECORDS", 'r') as f: records = f.readlines() records = list(map(lambda r: r.strip("\n"), records)) for record in records: sig, fields = wfdb.rdsamp(DATA_PATH + record, channels=[0]) res = processing.gqrs_detect(sig[:, 0], fs=fields['fs']) if len(res) > 0: wfdb.wrann(record, 'atr', res, write_dir=SAVE_PATH, symbol=(['N'] * len(res)))
def test_fs_is_kept_when_saving(self): wfdb.wrann("test_record_save_ann_fs", 'atr', np.array([1]), np.array(["N"]), fs=360, write_dir="data/ann") ann = wfdb.rdann("data/ann/test_record_save_ann_fs", "atr") self.assertEqual(360, ann.fs)
def UCDDB_Functions(): path = 'datasets\\db3_ucddb\\ucddb002_respevt.txt'; apnea_signals = ap.UCDDB_LoadAnnonationsTXTFileRaw(path); print(apnea_signals); start_time = datetime.strptime('00:11:04', '%H:%M:%S') annotations_std = ap.UCDDB_LoadAnnonationsTXTFileStandardized(path, start_time=start_time, duration_in_seconds=7.65*60*60); print(annotations_std) annotations_std_binary = [not(type=='none') for type in annotations_std] resampled = ap.ResampleAnnotations( annotations=annotations_std_binary, source_sample_frequency=1, target_sample_frequency=(1 / 60), preserve_input_size=False, ignore_first_timeframe_during_overlap=False, ignore_short_apnea_in_timeframe=False) resampled_full_size = ap.ResampleAnnotations( annotations=annotations_std_binary, source_sample_frequency=1, target_sample_frequency=(1 / 60), preserve_input_size=True, ignore_first_timeframe_during_overlap=False, ignore_short_apnea_in_timeframe=False) resampled_full_size_IgnoreFirstOverlap = ap.ResampleAnnotations( annotations=annotations_std_binary, source_sample_frequency=1, target_sample_frequency=(1 / 60), preserve_input_size=True, ignore_first_timeframe_during_overlap=True, ignore_short_apnea_in_timeframe=False) plt.plot(resampled) apn_symbols = list() for element in resampled: symbol = 'N' if element == 1: symbol = 'A' apn_symbols.append(symbol) resampled_1Min = [element*60*128 for element in range(len(apn_symbols))] print('Write annotation file') print(resampled_1Min) print(apn_symbols) wfdb.wrann('ucddb002', 'apn', np.array(resampled_1Min), np.array(apn_symbols)) with open('datasets\\db3_ucddb\\AnnotationsResampled\\ucddb002_AnnotationsInfo.txt', mode='w', newline='') as csv_file: fieldnames = \ ['Sample', 'DateTime', 'Apnea yes/no', 'Apnea Type', 'Apnea yes/no Resampled', 'Apnea yes/no Resampled Ignore First Overlap' ] writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter='\t') writer.writeheader() for i in range(len(annotations_std)): writer.writerow({'Sample': i, 'DateTime': (start_time + timedelta(seconds=i)), 'Apnea yes/no': annotations_std_binary[i], 'Apnea Type': annotations_std[i], 'Apnea yes/no Resampled': resampled_full_size[i], 'Apnea yes/no Resampled Ignore First Overlap': resampled_full_size_IgnoreFirstOverlap[i] }) return