def test_4c(self): """ Format 12, multi-samples per frame, skew, selected suration, selected channels, physical. Target file created with: rdsamp -r sample-data/03700181 -f 8 -t 128 -s 0 2 -P | cut -f 2- > record-4c """ sig, fields = wfdb.rdsamp('sample-data/03700181', channels=[0, 2], sampfrom=1000, sampto=16000) sig_round = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-4c') # Compare data streaming from physiobank sig_pb, fields_pb = wfdb.rdsamp('03700181', pb_dir='mimicdb/037', channels=[0, 2], sampfrom=1000, sampto=16000) # Test file writing. Multiple samples per frame and skew. # Have to read all the samples in the record, ignoring skew record_no_skew = wfdb.rdrecord('sample-data/03700181', physical=False, smooth_frames=False, ignore_skew=True) record_no_skew.wrsamp(expanded=True) # Read the written record writesig, writefields = wfdb.rdsamp('03700181', channels=[0, 2], sampfrom=1000, sampto=16000) assert np.array_equal(sig_round, sig_target) assert np.array_equal(sig, sig_pb) and fields == fields_pb assert np.array_equal(sig, writesig) and fields == writefields
def test_1b(self): """ Format 16, byte offset, selected duration, selected channels, physical. Target file created with: rdsamp -r sample-data/a103l -f 50 -t 160 -s 2 0 -P | cut -f 2- > record-1b """ sig, fields = wfdb.rdsamp('sample-data/a103l', sampfrom=12500, sampto=40000, channels=[2, 0]) sig_round = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-1b') # Compare data streaming from physiobank sig_pb, fields_pb = wfdb.rdsamp('a103l', pb_dir='challenge/2015/training', sampfrom=12500, sampto=40000, channels=[2, 0]) # Option of selecting channels by name sig_named, fields_named = wfdb.rdsamp('sample-data/a103l', sampfrom=12500, sampto=40000, channel_names=['PLETH', 'II']) assert np.array_equal(sig_round, sig_target) assert np.array_equal(sig, sig_pb) and fields == fields_pb assert np.array_equal(sig, sig_named) and fields == fields_named
def test_2c(self): record = wfdb.rdsamp('sampledata/100_3chan') siground = np.round(record.p_signals, decimals=8) targetsig = np.genfromtxt('tests/targetoutputdata/target2c') # Test file writing record.d_signals = record.adc() record.wrsamp() recordwrite = wfdb.rdsamp('100_3chan') record.d_signals = None assert np.array_equal(siground, targetsig) assert record.__eq__(recordwrite)
def test_3a(self): record= wfdb.rdsamp('sampledata/s0010_re', physical=False) sig = record.d_signals targetsig = np.genfromtxt('tests/targetoutputdata/target3a') # Compare data streaming from physiobank pbrecord= wfdb.rdsamp('s0010_re', physical=False, pbdir = 'ptbdb/patient001') # Test file writing record.wrsamp() recordwrite = wfdb.rdsamp('s0010_re', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(pbrecord) assert record.__eq__(recordwrite)
def ecgread(filename): """Чтение сигнала из файла (поддерживаются форматы ecg и MIT-BIH) :param filename: имя файла (в случае MIT-BIH передается без расширения) :type filename: str :return: """ if filename.endswith(".ecg"): with open(filename, "rb") as fi: hdr, data = read_buffer(fi) print(hdr) return data, hdr else: data, fields = wfdb.rdsamp(filename) # rdsamp возвращает сигнал без смещения в физических единицах numch = data.shape[1] hdr = { "fs": fields["fs"], "adc_gain": np.array([1.0]*numch), "baseline": np.array([0.0]*numch), "samples": data.shape[0], "channels": data.shape[1] } print(fields["sig_name"]) return data, hdr
def test_5b(self): record=wfdb.rdsamp('sampledata/multisegment/s00001/s00001-2896-10-10-00-31', sampfrom=14428364, sampto=14428375) siground=np.round(record.p_signals, decimals=8) targetsig=np.genfromtxt('tests/targetoutputdata/target5b') np.testing.assert_equal(siground, targetsig)
def test_1a(self): record = wfdb.rdsamp('sampledata/test01_00s', physical=False) sig = record.d_signals targetsig = np.genfromtxt('tests/targetoutputdata/target1a') # Compare data streaming from physiobank pbrecord = wfdb.rdsamp('test01_00s', physical=False, pbdir = 'macecgdb') # Test file writing record2 = wfdb.rdsamp('sampledata/test01_00s', physical=False) record2.signame = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4'] record2.wrsamp() recordwrite = wfdb.rdsamp('test01_00s', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(pbrecord) assert record2.__eq__(recordwrite)
def test_1c(self): record = wfdb.rdsamp('sampledata/a103l', sampfrom=20000, channels=[0, 1], physical=False) sig = record.d_signals targetsig = np.genfromtxt('tests/targetoutputdata/target1c') # Compare data streaming from physiobank pbrecord = wfdb.rdsamp('a103l', pbdir = 'challenge/2015/training', sampfrom=20000, channels=[0, 1], physical=False) # Test file writing record.wrsamp() recordwrite = wfdb.rdsamp('a103l', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(pbrecord) assert record.__eq__(recordwrite)
def test_normalize_bound(self): sig, _ = wfdb.rdsamp('sample-data/100') lb = -5 ub = 15 x = processing.normalize_bound(sig[:, 0], lb, ub) assert x.shape[0] == sig.shape[0] assert np.min(x) >= lb assert np.max(x) <= ub
def show_objective(): """ For the model """ # Choose a record records = dm.get_records() path = records[17] record = wf.rdsamp(path) ann = wf.rdann(path, 'atr') chid = 0 print 'Channel:', record.signame[chid] cha = record.p_signals[:, chid] # These were found manually sta = 184000 end = sta + 1000 times = np.arange(end-sta, dtype = 'float') times /= record.fs # Extract the annotations for that fragment where = (sta < ann.annsamp) & (ann.annsamp < end) samples = ann.annsamp[where] - sta print samples # Prepare dirac-comb type of labels qrs_values = np.zeros_like(times) qrs_values[samples] = 1 # Prepare gaussian-comb type of labels kernel = ss.hamming(36) qrs_gauss = np.convolve(kernel, qrs_values, mode = 'same') # Make the plots fig = plt.figure() ax1 = fig.add_subplot(3,1,1) ax1.plot(times, cha[sta : end]) ax2 = fig.add_subplot(3,1,2, sharex=ax1) ax2.plot(times, qrs_values, 'C1', lw = 4, alpha = 0.888) ax3 = fig.add_subplot(3,1,3, sharex=ax1) ax3.plot(times, qrs_gauss, 'C3', lw = 4, alpha = 0.888) plt.setp(ax1.get_xticklabels(), visible=False) plt.setp(ax2.get_xticklabels(), visible=False) plt.xlabel('Time [s]') plt.xlim([0, 2.5]) plt.show()
def test_2b(self): record = wfdb.rdsamp('sampledata/100', sampfrom=1, sampto=10800, channels=[1], physical=False) sig = record.d_signals targetsig = np.genfromtxt('tests/targetoutputdata/target2b') targetsig = targetsig.reshape(len(targetsig), 1) # Compare data streaming from physiobank pbrecord = wfdb.rdsamp('100', sampfrom=1, sampto=10800, channels=[1], physical=False, pbdir = 'mitdb') # This comment line was manually added and is not present in the original physiobank record del(record.comments[0]) # Test file writing record.wrsamp() recordwrite = wfdb.rdsamp('100', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(pbrecord) assert record.__eq__(recordwrite)
def test_2a(self): """ Format 212, entire signal, physical. Target file created with: rdsamp -r sample-data/100 -P | cut -f 2- > record-2a """ sig, fields = wfdb.rdsamp('sample-data/100') sig_round = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-2a') # Compare data streaming from physiobank sig_pb, fields_pb = wfdb.rdsamp('100', pb_dir = 'mitdb') # This comment line was manually added and is not present in the # original physiobank record del(fields['comments'][0]) assert np.array_equal(sig_round, sig_target) assert np.array_equal(sig, sig_pb) and fields == fields_pb
def test_4a(self): record = wfdb.rdsamp('sampledata/test01_00s_skewframe', physical=False) sig = record.d_signals # The WFDB library rdsamp does not return the final N samples for all # channels due to the skew. The WFDB python rdsamp does return the final # N samples, filling in NANs for end of skewed channels only. sig = sig[:-3, :] targetsig = np.genfromtxt('tests/targetoutputdata/target4a') # Test file writing. Multiple samples per frame and skew. # Have to read all the samples in the record, ignoring skew recordnoskew = wfdb.rdsamp('sampledata/test01_00s_skewframe', physical=False, smoothframes=False, ignoreskew=True) recordnoskew.wrsamp(expanded=True) # Read the written record recordwrite = wfdb.rdsamp('test01_00s_skewframe', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(recordwrite)
def test_3b(self): """ Multi-dat, selected duration, selected channels, physical. Target file created with: rdsamp -r sample-data/s0010_re -f 5 -t 38 -P -s 13 0 4 8 3 | cut -f 2- > record-3b """ sig, fields = wfdb.rdsamp('sample-data/s0010_re', sampfrom=5000, sampto=38000, channels=[13, 0, 4, 8, 3]) sig_round = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-3b') # Compare data streaming from physiobank sig_pb, fields_pb = wfdb.rdsamp('s0010_re', sampfrom=5000, pb_dir='ptbdb/patient001', sampto=38000, channels=[13, 0, 4, 8, 3]) assert np.array_equal(sig_round, sig_target) assert np.array_equal(sig, sig_pb) and fields == fields_pb
def test_1d(self): """ Format 80, selected duration, selected channels, physical Target file created with: rdsamp -r sample-data/3000003_0003 -f 1 -t 8 -s 1 -P | cut -f 2- > record-1d """ sig, fields = wfdb.rdsamp('sample-data/3000003_0003', sampfrom=125, sampto=1000, channels=[1]) sig_round = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-1d') sig_target = sig_target.reshape(len(sig_target), 1) # Compare data streaming from physiobank sig_pb, fields_pb = wfdb.rdsamp('3000003_0003', pb_dir='mimic2wdb/30/3000003/', sampfrom=125, sampto=1000, channels=[1]) assert np.array_equal(sig_round, sig_target) assert np.array_equal(sig, sig_pb) and fields == fields_pb
def show_path(path): """ As a plot """ # Read in the data record = wf.rdsamp(path) annotation = wf.rdann(path, 'atr') data = record.p_signals cha = data[:, 0] print 'Channel type:', record.signame[0] times = np.arange(len(cha), dtype = float) times /= record.fs plt.plot(times, cha) plt.xlabel('Time [s]') plt.show()
def test_resample_multi(self): sig, fields = wfdb.rdsamp('sample-data/100') ann = wfdb.rdann('sample-data/100', 'atr') fs = fields['fs'] fs_target = 50 new_sig, new_ann = processing.resample_multichan(sig, ann, fs, fs_target) expected_length = int(sig.shape[0]*fs_target/fs) assert new_sig.shape[0] == expected_length assert new_sig.shape[1] == sig.shape[1]
def test_4d(self): record = wfdb.rdsamp('sampledata/test01_00s_skewframe', smoothframes=False) # Upsample the channels with lower samples/frame expandsig = np.zeros((7994, 3)) expandsig[:,0] = np.repeat(record.e_p_signals[0][:-3],2) expandsig[:,1] = record.e_p_signals[1][:-6] expandsig[:,2] = np.repeat(record.e_p_signals[2][:-3],2) siground = np.round(expandsig, decimals=8) targetsig = np.genfromtxt('tests/targetoutputdata/target4d') assert np.array_equal(siground, targetsig)
def test_2e(self): """ Format 311, selected duration, physical. Target file created with: rdsamp -r sample-data/3000003_0003 -f 0 -t 8.21 -s 1 | cut -f 2- | wrsamp -o 311derive -O 311 rdsamp -r 311derive -f 0.005 -t 3.91 -P | cut -f 2- > record-2e """ sig, fields = wfdb.rdsamp('sample-data/311derive', sampfrom=1, sampto=978) sig = np.round(sig, decimals=8) sig_target = np.genfromtxt('tests/target-output/record-2e') sig_target = sig_target.reshape([977, 1]) assert np.array_equal(sig, sig_target)
def test_4b(self): record = wfdb.rdsamp('sampledata/03700181', physical=False) sig = record.d_signals # The WFDB library rdsamp does not return the final N samples for all # channels due to the skew. sig = sig[:-4, :] # The WFDB python rdsamp does return the final N samples, filling in # NANs for end of skewed channels only. targetsig = np.genfromtxt('tests/targetoutputdata/target4b') # Compare data streaming from physiobank pbrecord = wfdb.rdsamp('03700181', physical=False, pbdir = 'mimicdb/037') # Test file writing. Multiple samples per frame and skew. # Have to read all the samples in the record, ignoring skew recordnoskew = wfdb.rdsamp('sampledata/03700181', physical=False, smoothframes=False, ignoreskew=True) recordnoskew.wrsamp(expanded=True) # Read the written record recordwrite = wfdb.rdsamp('03700181', physical=False) assert np.array_equal(sig, targetsig) assert record.__eq__(pbrecord) assert record.__eq__(recordwrite)
def test_xqrs(self): """ Run xqrs detector on record 100 and compare to reference annotations """ sig, fields = wfdb.rdsamp('sample-data/100', channels=[0]) ann_ref = wfdb.rdann('sample-data/100','atr') xqrs = processing.XQRS(sig=sig[:,0], fs=fields['fs']) xqrs.detect() comparitor = processing.compare_annotations(ann_ref.sample[1:], xqrs.qrs_inds, int(0.1 * fields['fs'])) assert comparitor.specificity > 0.99 assert comparitor.positive_predictivity > 0.99 assert comparitor.false_positive_rate < 0.01
def show_annotations(path): """ Exemplary code """ record = wf.rdsamp(path) annotation = wf.rdann(path, 'atr') # Get data and annotations for the first 2000 samples howmany = 2000 channel = record.p_signals[:howmany, 0] # Extract all of the annotation related infromation where = annotation.annsamp < howmany samp = annotation.annsamp[where] # Convert to numpy.array to get fancy indexing access types = np.array(annotation.anntype) types = types[where] times = np.arange(howmany, dtype = 'float') / record.fs plt.plot(times, channel) # Prepare qrs information for the plot qrs_times = times[samp] # Scale to show markers at the top qrs_values = np.ones_like(qrs_times) qrs_values *= channel.max() * 1.4 plt.plot(qrs_times, qrs_values, 'ro') # Also show annotation code # And their words for it, sam in enumerate(samp): # Get the annotation position xa = times[sam] ya = channel.max() * 1.1 # Use just the first letter a_txt = types[it] plt.annotate(a_txt, xy = (xa, ya)) plt.xlim([0, 4]) plt.xlabel('Time [s]') plt.show()
def process_one_record(src_file, dest_file, config): data, fields = wfdb.rdsamp(src_file) numch = data.shape[1] hdr = { "fs": fields["fs"], "adc_gain": np.array([1.0] * numch), "baseline": np.array([0.0] * numch), "samples": data.shape[0], "channels": data.shape[1] } if config["PREPROCESSING"].get("baseline_correction", True): ubsig = fix_baseline( data, fields["fs"], config["BASELINE"]["unbias_window_ms"] ) else: ubsig = np.array(data, "float") if config["PREPROCESSING"].get("mains_correction", True): umsig = mains_filter( ubsig, fs=fields["fs"], bias=hdr["baseline"], mains=config["MAINS_FILTER"]["base_freq"], attenuation=config["MAINS_FILTER"]["attenuation"], aperture=config["MAINS_FILTER"]["fft_size"] ) else: umsig = ubsig.copy() wfdb.wrsamp( dest_file, fs=fields["fs"], units=fields["units"], sig_name=fields["sig_name"], comments=fields["comments"], p_signal=umsig, fmt=fields.get("fmt", ["16"]*umsig.shape[1]) )
def test_4c(self): sig, fields = wfdb.srdsamp('sampledata/03700181', channels=[0, 2], sampfrom=1000, sampto=16000) siground = np.round(sig, decimals=8) targetsig = np.genfromtxt('tests/targetoutputdata/target4c') # Compare data streaming from physiobank pbsig, pbfields = wfdb.srdsamp('03700181', pbdir = 'mimicdb/037', channels=[0, 2], sampfrom=1000, sampto=16000) # Test file writing. Multiple samples per frame and skew. # Have to read all the samples in the record, ignoring skew recordnoskew = wfdb.rdsamp('sampledata/03700181', physical=False, smoothframes=False, ignoreskew=True) recordnoskew.wrsamp(expanded=True) # Read the written record writesig, writefields = wfdb.srdsamp('03700181', channels=[0, 2], sampfrom=1000, sampto=16000) assert np.array_equal(siground, targetsig) assert np.array_equal(sig, pbsig) and fields == pbfields assert np.array_equal(sig, writesig) and fields == writefields
def test_correct_peaks(self): sig, fields = wfdb.rdsamp('sample-data/100') ann = wfdb.rdann('sample-data/100', 'atr') fs = fields['fs'] min_bpm = 10 max_bpm = 350 min_gap = fs*60/min_bpm max_gap = fs * 60 / max_bpm y_idxs = processing.correct_peaks(sig=sig[:,0], peak_inds=ann.sample, search_radius=int(max_gap), smooth_window_size=150) yz = np.zeros(sig.shape[0]) yz[y_idxs] = 1 yz = np.where(yz[:10000]==1)[0] expected_peaks = [77, 370, 663, 947, 1231, 1515, 1809, 2045, 2403, 2706, 2998, 3283, 3560, 3863, 4171, 4466, 4765, 5061, 5347, 5634, 5919, 6215, 6527, 6824, 7106, 7393, 7670, 7953, 8246, 8539, 8837, 9142, 9432, 9710, 9998] assert np.array_equal(yz, expected_peaks)
sampsize = 10000 rate = 1000 # Define text arrays patients = (np.loadtxt(fname=fpath, dtype=str)).tolist() controls = (np.loadtxt(fname=cpath, dtype=str)).tolist() # Take control patient array away from full patient array mipatients = (set(patients) - set(controls)) misize = len(mipatients) csize = len(controls) # Allocate memory for MIdatabase array midatabase = np.zeros(shape=(misize, (sampsize + 1))) cdatabase = np.zeros(shape=(csize, (sampsize + 1))) i = 0 for p in mipatients: sig = (wf.rdsamp(dpath + str(p), sampto=sampsize, pbdl=0, channels=[0])[0]) nsig = np.append(sig, [1]) nsig.shape = (1, 10001) # print nsig.shape[1] midatabase[i] = nsig # Appends the two 2D arrays together i = i + 1 n = 0 for c in (controls): sig = (wf.rdsamp(dpath + str(c), sampto=sampsize, pbdl=0, channels=[0])[0]) nsig = np.append(sig, [0]) nsig.shape = (1, 10001) cdatabase[n] = nsig # Appends the two 2D arrays together n = n + 1 print midatabase print cdatabase # print midatabase [0,10000]
import wfdb from biosppy.signals import ecg import h5py import numpy as np import sys import os filename = sys.argv[1] signal, fields = wfdb.rdsamp(filename) annotation = wfdb.rdann(filename, 'atr') fs = fields['fs'] # sample rate MLII_raw = signal[:, 0] is_noise = False every_two_minute = 0 all_clean = False if len(filename.split('/')[-1]) == 3: # 118, 119 all_clean = True filename_prefix = '../data_10s/' + filename.split('/')[-1] + '/' os.mkdir(filename_prefix) for i in range(0, len(MLII_raw), fs * 10): with h5py.File(filename_prefix + str(i) + '.h5', 'w') as f: if (i - 5 * 60 * fs) >= (2 * 60 * fs) * every_two_minute and not all_clean: is_noise = not is_noise every_two_minute += 1 f.create_dataset('index', data=i) f.create_dataset('MLII', data=MLII_raw[i:i + fs * 10]) f.create_dataset('SNR', data=(filename[-2:] if is_noise else 'clean'))
annot_positions) annotated_rr_segments = create_rr_annotation(annotated_r_peaks) # numpy.set_printoptions(suppress=True) # print annotated_rr_segments numpy.savetxt(QRS_LOCATION + record_id + '_RR.txt', annotated_rr_segments, delimiter=",") if __name__ == "__main__": if len(sys.argv) != 2: print 'Please provide a vfdb record number' exit() arg1 = str(sys.argv[1]) if arg1 == 'all': for record_id in VFDB_RECORDS: record = str(record_id) qrs = load_qrs_i(record) print qrs.size sig, fields = wfdb.rdsamp('./vfdb/' + record, channels=[0]) xqrs = processing.XQRS(sig=sig[:, 0], fs=fields['fs']) xqrs.detect() print xqrs.qrs_inds.size print "\n" #save_rr_segment_to_csv(str(record_id)) else: record_id = arg1
def dataset_making(sig): signal = [] dataset = [] raw = [] ann = wfdb.rdann('INCART/' + sig, 'atr') sig, fields = wfdb.rdsamp('INCART/' + sig, channels=[5]) for g in sig: raw.append(g[0]) samplerate = ann.fs signew = preprocessing(raw, samplerate) cd = [] for w in range(len(ann.sample)): types = ann.symbol[w] array = [ann.sample[w], types] cd.append(array) for k in range(len(signew)): sgt = signew[k] temps = [k, sgt] signal.append(temps) A = cd B = signal B_Dict = {b[0]: b for b in B} array_new = [[B_Dict.get(a[0])[0], B_Dict.get(a[0])[1], a[1]] for a in A if B_Dict.get(a[0])] for j in range(len(array_new)): for k in range(len(array_new[j])): amplitude = array_new[j][1] d1 = array_new[j][0] if (j == 0 or j != (len(array_new) - 1)): d2 = array_new[j + 1][0] distance = d2 - d1 RR = (distance / samplerate) HR = int(60 / RR) else: RR = 0 HR = 0 if (j == 0): toward = array_new[j + 1][1] toback = 0 if (j == (len(array_new) - 1)): toward = 0 toback = array_new[j - 1][1] else: toward = array_new[j + 1][1] toback = array_new[j - 1][1] class_data = (array_new[j][2]) # temp = [amplitude, toback, toward, RR, HR, class_data] temp = [amplitude, toback, toward, RR, HR, class_data] dataset.append(temp) dtn = [] amp = [] bk = [] fr = [] rrt = [] hr = [] cld = [] hrb = [] hra = [] for h in range(len(dataset)): amp.append(dataset[h][0]) bk.append(dataset[h][1]) fr.append(dataset[h][2]) rrt.append(dataset[h][3]) hr.append(dataset[h][4]) cld.append(dataset[h][5]) for g in range(len(hr)): if (g == (len(hr) - 1)): hrb.append(hr[g - 1]) hra.append(0) elif (g == 0): hra.append(hr[g + 1]) hrb.append(0) else: hra.append(hr[g + 1]) hrb.append(hr[g - 1]) for c in range(len(hr) - 1): c += 1 typenew = [ amp[c], bk[c], fr[c], rrt[c], hrb[c], hr[c], hra[c], dtypes(cld[c]) ] dtn.append(typenew) return dtn
for i in range(15): new = string.split("f") if int(new[2]) < 9: string_2 = new[:2] + list(str(0)) string_3 = list("f".join(str(e) for e in string_2[:3])) + list( str(int(new[2]) + 1)) string = "".join(str(e) for e in string_3) else: string_2 = new[:2] + list(str(int(new[2]) + 1)) string_3 = list("f".join(str(e) for e in string_2[:3])) + list( string_2[3]) string = "".join(str(e) for e in string_3) print(string) record[i] = wfdb.rdsamp(string, sampto=1000) sig[i] = record[i].p_signals #wfdb.plotrec(record[i], title=string) #for j in range(len(sig[i])): #data[i].append(sig[i][j][0]) mini = x.append(len(sig[i])) n = len(sig[i]) # total number of samples T = n / fs t = np.linspace(0, T, n, endpoint=False) # HPF y = butter_highpass_filter(data[i], cutoff, fs, order)
import wfdb import urllib.request import matplotlib.pyplot as plt ### List of records records = wfdb.io.get_record_list(db_dir='mitdb', records='all') # In[6]: ### Dataframe of voltages list_volt_0 = [] for i in records: image = wfdb.rdrecord(i, pb_dir='mitdb', sampto=648000, channels=[0]) signals_array, fields_dictionary = wfdb.rdsamp(i, pb_dir='mitdb', sampto=648000, channels=[0]) voltages = pd.DataFrame(signals_array) list_volt_0.append(voltages) df_volt_0 = pd.concat(list_volt_0, axis=1) df_volt_0.columns = records df_volt_0.index.name = 'Time' df_volt_0 = df_volt_0.reset_index() df_volt_0 = pd.melt(df_volt_0, id_vars='Time', var_name="Patient", value_name="Voltage_L2") df_volt_0 # In[7]:
def predictBeat(signal): try_signal, _ = wfdb.rdsamp(signal, channels=[0], sampfrom=0, sampto=9000) try_signal = try_signal.flatten() indexes = peakutils.indexes(try_signal, thres=0.5, min_dist=30) a, b = scipy.signal.butter(3, 1 / 180, btype='highpass', analog=True) fil1 = scipy.signal.lfilter(b, a, try_signal) c, d = scipy.signal.butter(3, [58 / 180, 62 / 180], btype='bandstop', analog=True) fil2 = scipy.signal.lfilter(d, c, fil1) e, f = scipy.signal.butter(4, 25 / 180, btype='lowpass', analog=True) filtered = scipy.signal.lfilter(f, e, fil2) minutes = (len(filtered) / (360 * 60)) heart_rate = len(indexes) / minutes globalList = list() for i in range(len(indexes)): to_append = list() r_peak = indexes[i] qrs_start = r_peak - int(len_qrs / 2) + 1 qrs_segment = filtered[qrs_start:qrs_start + len_qrs] stt_segment = filtered[qrs_start + len_qrs + 1:qrs_start + len_qrs + len_stt] if len(qrs_segment) > 0: _, qrs_arcoeffs, _, _, _ = levinson_durbin(qrs_segment, nlags=autoreg_ord, isacov=False) else: qrs_arcoeffs = ['nan'] * 9 #AR Coefficients of STT if len(stt_segment) > 0: _, stt_arcoeffs, _, _, _ = levinson_durbin(stt_segment, nlags=autoreg_ord, isacov=False) else: stt_arcoeffs = ['nan'] * 9 # Pre RR and Post RR length if i > 0: pre_rr = indexes[i] - indexes[i - 1] else: pre_rr = None if i + 1 < len(indexes): post_rr = indexes[i + 1] - indexes[i] else: post_rr = None to_append.append(qrs_start) to_append.append(qrs_start + len_qrs + len_stt) to_append.append(pre_rr) to_append.append(post_rr) to_append.extend(qrs_arcoeffs) to_append.extend(stt_arcoeffs) if None not in to_append and 'nan' not in to_append: globalList.append(to_append) df = pd.DataFrame(globalList, columns=[ 'start', 'end', 'pre_rr', 'post_rr', 'arqrs1', 'arqrs2', 'arqrs3', 'arqrs4', 'arqrs5', 'arqrs6', 'arqrs7', 'arqrs8', 'arqrs9', 'arstt1', 'arstt2', 'arstt3', 'arstt4', 'arstt5', 'arstt6', 'arstt7', 'arstt8', 'arstt9' ]) test = df[[ 'pre_rr', 'post_rr', 'arqrs1', 'arqrs2', 'arqrs3', 'arqrs4', 'arqrs5', 'arqrs6', 'arqrs7', 'arqrs8', 'arqrs9', 'arstt1', 'arstt2', 'arstt3', 'arstt4', 'arstt5', 'arstt6', 'arstt7', 'arstt8', 'arstt9' ]] test = scale(test) predictions = clf.predict(test) predictions = list(predictions) pred_dict = dict(Counter(predictions)) if 0 in pred_dict.keys(): pred_dict['Normal'] = pred_dict.pop(0) if 1 in pred_dict.keys(): pred_dict['Premature Beats'] = pred_dict.pop(1) if 2 in pred_dict.keys(): pred_dict['Escape Beats'] = pred_dict.pop(2) if 3 in pred_dict.keys(): pred_dict['Fusion Beats'] = pred_dict.pop(3) if 4 in pred_dict.keys(): pred_dict['Unrecognized'] = pred_dict.pop(4) if 5 in pred_dict.keys(): pred_dict['Bundled Branch Block Beat'] = pred_dict.pop(5) fig = matplotlib.pyplot.figure(figsize=(10, 7)) matplotlib.pyplot.pie(pred_dict.values(), labels=pred_dict.keys(), autopct='%1.0f%%') matplotlib.pyplot.savefig('images\pie.jpg', bbox_inches='tight') matplotlib.pyplot.close() for ind in df.index: if predictions[ind] != 0: sig = filtered[df['start'][ind]:df['end'][ind]] matplotlib.pyplot.axis('off') matplotlib.pyplot.plot(sig) matplotlib.pyplot.savefig('images\\' + str(ind) + '.jpg', bbox_inches='tight') matplotlib.pyplot.close() keymax = max(pred_dict, key=pred_dict.get) print('Your maximum beats are: ', keymax) print('Heart Rate: ', heart_rate) ans['beats'] = keymax ans['rate'] = int(heart_rate) return ans
#BR, BC1, BC2, BI1, BI2, BI3, BI4, BI5, PC1, PC2, PR1, PR2 #delinDB = [[], [], [], [], [], [], [], [], [], [], [], []] #%% # Open file to save the results outFile = h5py.File(fName, 'w') # Process one patient at a time for patient in range(1, len(annot)): print('Processing patient', patient) # Process BR, BC1 and BC2 for measurement in range(0, 3): rec = annot[patient][measurement] # Find the name of the record if rec != '': # Check that the record exists try: s, att = wfdb.rdsamp(rec.zfill(4), pn_dir=dbase) # Read from Physionet # Calculate augmented limb leads and append them to the signals aVR, aVL, aVF = ecg.augmentedLimbs(s[:, -3], s[:, -2]) s = np.concatenate((s, aVR, aVL, aVF), axis=1) # Delineate all the ECG leads using the WT and fusion techniques ECGdelin = ecg.delineateMultiLeadECG(s, att['fs']) # Create subgroup for the patient and save all the leads grp = outFile.create_group(tests[measurement] + '/' + str(patient).zfill(3)) for idx, ECG in enumerate(ECGdelin): dsetName = leadNames[idx] dset = grp.create_dataset(dsetName, ECG.shape, ECG.dtype) dset[...] = ECG #dset.attrs.create(h5attr[0],h5attr[1])
def EMD_data_preparation(filepath,patient_data,csv_folder,samplenumber,split_perc): miscle=['Stable angina','Palpitation', 'Unstable angina'] cardiom=['Heart failure (NYHA 4)', 'Heart failure (NYHA 3)', 'Heart failure (NYHA 2)'] ecg_lead = ['i','ii','iii','avr','avl','avf','v1','v2','v3','v4','v5','v6','vx','vy','vz'] Original_train = open(csv_folder+'Original_train.csv', 'w') Original_test = open(csv_folder+'Original_test.csv', 'w') f = open(patient_data) line = f.readline() disease_array=[] Total_data = 0 while line: splitted = line.split('/') file_name = str(splitted[1][0:8]) patient_folder = str(splitted[0]) total_path = filepath+patient_folder+'/'+file_name print patient_folder,'---',file_name, #print total_path try: signal,ecgrecord = wfdb.rdsamp(total_path) print ecgrecord['comments'][4][22:] signal_length = len(signal) if not ecgrecord['comments'][4][22:] == 'n/a': disease = ecgrecord['comments'][4][22:] if disease in miscle: disease = "Miscellaneous" elif disease in cardiom: disease = "Cardiomyopathy" if disease not in disease_array: disease_array.append(disease) samplelength = 0 undecomposed = 0 not_match = 0 if disease == 'Myocardial infarction': overlap = 1000 repetition = int(math.floor(signal_length/samplenumber)) elif disease == 'Healthy control': overlap = 220 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Cardiomyopathy': overlap = 45 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Bundle branch block': overlap = 46 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Dysrhythmia': overlap = 30 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Hypertrophy': overlap = 20 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Valvular heart disease': overlap = 9 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Myocarditis': overlap = 11 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) elif disease == 'Miscellaneous': overlap = 55 repetition = int(math.floor(((signal_length-samplenumber)/overlap) + 1)) stop = int(math.ceil(repetition*split_perc)) ########### Trining and Test Data Spliting ###################### #Training data prepare for j in range(0,stop): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) Original_signal = np.asarray(write_signal) try: Total_data = Total_data+1 string = str(Original_signal[0]) for h in range(1,samplenumber): string = string +','+str(float("{0:.3f}".format(Original_signal[h]))) string = string+','+disease+'\n' Original_train.write(string) samplelength = samplelength+overlap except: print 'Could not Write' samplelength = samplelength+overlap #Testing data preparation for j in range(stop,repetition): write_signal = [] for sample in range(samplelength,samplelength+samplenumber): ecg_signal = 0 for i1 in range(0,15): ecg_signal = ecg_signal+signal[sample][i1] write_signal.append(ecg_signal) Original_signal = np.asarray(write_signal) try: Total_data = Total_data+1 string = str(Original_signal[0]) for h in range(1,samplenumber): string = string +','+str(float("{0:.6f}".format(Original_signal[h]))) string = string+','+disease+'\n' Original_test.write(string) samplelength = samplelength+overlap except: print 'Could not write' samplelength = samplelength+overlap line = f.readline() except: line = f.readline() print sys.exc_info(),'\n' f.close() problem_data.close() print disease_array print Total_data
import wfdb import matplotlib.pyplot as plt #record = wfdb.rdrecord('ecg-id-database-1.0.0/Person_01/rec_1', sampto=3000) #annotation = wfdb.rdann('ecg-id-database-1.0.0/Person_01/rec_1', 'atr', sampto=3000) #wfdb.plot_wfdb(record=record, annotation=annotation, plot_sym=True, # time_units='seconds', title='Person 1, Record 1') signals, fields = wfdb.rdsamp('ecg-id-database-1.0.0/Person_01/rec_1', sampto=1000) print(signals) print(signals[:, 1]) plt.figure(1) fig = plt.plot(signals[:, 1]) plt.ylabel('mV') plt.xlabel('seconds') plt.show()
def read_data(): # read all files in data dir files = glob.glob('data/*.dat') for record in files: print(record) record = record[:-4] signals, fields = wfdb.rdsamp(record, channels=[0]) annotation = wfdb.rdann(record, 'atr') beats = list(annotation.sample) done = 0 for i in range(0, len(annotation.symbol)): if annotation.symbol[i] in count: count[annotation.symbol[i]] += 1 result, d = add_to_data(signals, i, annotation.symbol[i], beats) if result: done = d elif annotation.symbol[i] == '+': if annotation.aux_note[i].strip('\x00') in count: if annotation.aux_note[i].strip( '\x00') == '(VFL' or annotation.aux_note[i].strip( '\x00') == '(BII': print('Found') count[annotation.aux_note[i].strip('\x00')] += 1 result, d = add_to_data( signals, i, annotation.aux_note[i].strip('\x00'), beats) if result: done = d #else: #print("Symbol not present: "+annotation.symbol[i]) #print('yolo') print('done') sum = 0 threshold = 5000 new_data = {} for key, pair in beats_data.items(): print(key + ' -count is: ' + str(len(pair))) if len(pair) >= threshold: random.shuffle(pair) p = np.array(pair[:5000]) nsamples, nx, ny = p.shape new_data[key] = p.reshape((nsamples, nx * ny)) else: a = pair b = [] while len(b) < threshold: b.extend(a) random.shuffle(b) p = np.array(b[:5000]) nsamples, nx, ny = p.shape new_data[key] = p.reshape((nsamples, nx * ny)) sum += len(pair) print('Total - ' + str(sum)) X = [] y = [] for key, pair in new_data.items(): X.extend(pair) for i in range(0, len(pair)): y.append(key) return np.array(X), np.array(y)
labels_all = [] segs_all = [] segs_allqual = [] for i in Person_IDs: curr_ID_str = str(i) if i < 10: curr_ID_str = '0' + curr_ID_str curr_foldername = 'Person_' + curr_ID_str curr_filenames = glob.glob(curr_foldername + "/*.dat") j = 0 for one_filename in curr_filenames: # One rec has many pulses filename = one_filename[:-4] sig, fields = wfdb.rdsamp(filename)#, sampto=1000) #, pbdl=0) #print fields fs = fields['fs'] sig_use = sig[:,1] dt = 1./fs seg_len = SEG_LEN_MS/1000. / (1./fs) # unit: num of bins #print seg_len TO_PLOT_ORIGIN_SIG = False if TO_PLOT_ORIGIN_SIG and 'Person_01/rec_7' in filename: ts = np.linspace(0,dt*len(sig_use),len(sig_use)) fig = plt.figure(figsize=(8,6)) plt.plot(ts, sig[:,0]) #my_funcs.config_plot('Time /s', 'Amplitude /mV')
def test_2d(self): record = wfdb.rdsamp('sampledata/310derive', sampfrom=2, physical=False) sig = record.d_signals targetsig = np.genfromtxt('tests/targetoutputdata/target2d') assert np.array_equal(sig, targetsig)
import matplotlib.pyplot as plt directory = 'F:/Foldery/Nauka/Python/ekg/patient1/s0010_re' ecg_record = wfdb.io.rdheader('F:/Foldery/Nauka/Python/ekg/patient1/s0010_re') ecg_record.file_name[0] # file name ecg_record.fmt[0] # format ecg_record.adc_gain[0] # ADV GAIN ecg_record.adc_res[0] # ADC RESOL ecg_record.adc_zero[0] # ADC ZERO ecg_record.init_value[0] # init value ecg_record.checksum[0] # cheksum ecg_record.block_size[0] # block size ecg_record.sig_name[0] # description print(ecg_record.record_name + str(ecg_record.n_sig) + ' ' + str(int(ecg_record.fs)) + ' ' + str(ecg_record.sig_len)) for k in range(ecg_record.n_sig): print(ecg_record.file_name[k] + ' ' + ecg_record.fmt[k] + ' ' + str(int(ecg_record.adc_gain[k])) + ' ' + str(ecg_record.adc_res[k]) + ' ' + str(ecg_record.adc_zero[k]) + ' ' + str(ecg_record.init_value[k]) + ' ' + str(ecg_record.checksum[k]) + ' ' + str(ecg_record.block_size[k]) + ' ' + ecg_record.sig_name[k]) signals, fields = wfdb.rdsamp(directory, sampfrom=0, sampto=2000, channels=[0, 1]) N = len(signals) plt.plot(np.arange(N), signals)
def extract_features (record_path, length_qrs, length_stt, ar_order_qrs, ar_order_stt, sampfrom=0, sampto=-1, use_filter=True): """ A list holding tuples with values 'N' or 'VEB', and the length in samples of each corresponding QRS and ST/T complexes, plus the length in samples of pre- and post-RR """ print(record_path) qrs_stt_rr_list = list() sampto = 1000 #print(sampto) if sampto < 0: raw_signal, _ = wfdb.rdsamp(record_path, channels=[0], sampfrom=sampfrom, sampto="end") annotations = wfdb.rdann(record_path, extension="atr", sampfrom=sampfrom, sampto=None) else: raw_signal= wfdb.rdsamp(record_path, channels=[0], sampfrom=sampfrom, sampto=sampto) annotations = wfdb.rdann(record_path, extension="atr", sampfrom=sampfrom, sampto=sampto) raw_signal = raw_signal.reshape(-1) # Filtering if use_filter: filter_1 = butter_filter(raw_signal, filter_type="highpass", order=3, cutoff_freqs=[1], sampling_freq=annotations.fs) filter_2 = butter_filter(filter_1, filter_type="bandstop", order=3, cutoff_freqs=[58, 62], sampling_freq=annotations.fs) signal = butter_filter(filter_2, filter_type="lowpass", order=4, cutoff_freqs=[25], sampling_freq=annotations.fs) else: signal = raw_signal annotation2sample = list(zip(annotations.symbol, annotations.sample)) for idx, annot in enumerate(annotation2sample): beat_type = annot[0] # "N", "V", ... etc. r_peak_pos = annot[1] # The R peak position pulse_start_pos = r_peak_pos - int(length_qrs / 2) + 1 # The sample postion of pulse start (start of QRS) # We treat only Normal, VEB, and SVEB signals print(beat_type) if beat_type == "N" or beat_type == "S" or beat_type == "V": qrs = signal[pulse_start_pos : pulse_start_pos + length_qrs] stt = signal[pulse_start_pos + length_qrs + 1 : pulse_start_pos + length_qrs + length_stt] #print(qrs.size) if qrs.size > 0: _, qrs_arcoeffs, _, _, _ = levinson_durbin(qrs, nlags=ar_order_qrs, isacov=False) else: qrs_arcoeffs = None #print(stt.size) if stt.size > 0: #print(stt.shape) _, stt_arcoeffs, _, _, _ = levinson_durbin(stt, nlags=ar_order_stt, isacov=False) else: stt_arcoeffs = None pre_rr_length = annotation2sample[idx][1] - annotation2sample[idx - 1][1] if idx > 0 else None post_rr_length = annotation2sample[idx + 1][1] - annotation2sample[idx][1] if idx + 1 < annotations.ann_len else None _type = 1 if beat_type == "V" else 0 """ beat_dict = OrderedDict([("record", record_path.rsplit(sep="/", maxsplit=1)[-1]), ("type", _type), ("QRS", qrs), ("ST/T", stt), ("QRS_ar_coeffs", qrs_arcoeffs), ("ST/T_ar_coeffs", stt_arcoeffs), ("pre-RR", pre_rr_length), ("post-RR", post_rr_length)]) """ beat_list = list() beat_list = [("record", record_path.rsplit(sep="/", maxsplit=1)[-1]), ("type", _type), # ("QRS", qrs), ("ST/T", stt), # ("QRS_ar_coeffs", qrs_arcoeffs), ("ST/T_ar_coeffs", stt_arcoeffs), ("pre-RR", pre_rr_length), ("post-RR", post_rr_length) ] #print(qrs_arcoeffs) #print('gdlgh') for idx, coeff in enumerate(qrs_arcoeffs): beat_list.append(("qrs_ar{}".format(idx), coeff)) print(stt_arcoeffs) for idx, coeff in enumerate(stt_arcoeffs): beat_list.append(("stt_ar{}".format(idx), coeff)) beat_dict = OrderedDict(beat_list) qrs_stt_rr_list.append(beat_dict) return qrs_stt_rr_list
from scipy import signal realbeats = [ 'L', 'R', 'B', 'A', 'a', 'J', 'S', 'V', 'r', 'F', 'e', 'j', 'n', 'E', '/', 'f', 'Q', '?' ] RECORDS = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234 ] for i in RECORDS: try: record = wf.rdsamp("Signals/" + str(i)) annotation = wf.rdann("Signals/" + str(i), 'atr') print(' Sampling frequency used for this record:', record[1].get('fs')) print(' Shape of loaded data array:', record[0].shape) print(' Number of loaded annotations:', len(annotation.num)) data = record[0].transpose( ) # transpose to be done before the signal can be processed using the biosppy cat = np.array(annotation.symbol ) # getting the annotation symbols for each reocrd rate = np.zeros_like(cat, dtype='float') # enumerating through the annotations,if N then normal if else then abnormal for cat_id, cat_val in enumerate(cat): if cat_val == 'N':
def load_example(path): signal = wfdb.rdsamp(path, sampto=None) annotation = wfdb.rdann(path, 'atr', sampto=None) return signal, annotation
# date: June-2020 import numpy as np import matplotlib.pyplot as plt import os import wfdb # Set the parameters data_dir = '../../data' in_file = 'rec_1' rate = 500 # [Hz] nn = 10 # calculate HRV over nn heart beats # Import the data os.chdir(data_dir) sig, fields = wfdb.rdsamp(in_file) ecg = sig[:, 0] plt.plot(ecg) # Set the threshold for R-detection #threshold = plt.ginput(1) threshold = 0.4 # Find the locations where the signal exceeds the threshold high = ecg > threshold onset = np.where(np.diff(high * 1.) == 1)[0] # Times between two heartbeats dt = 1 / rate dts = np.diff(onset) * dt
labels = [] all_segs = [] for i in Person_IDs: curr_ID_str = str(i) if i < 10: curr_ID_str = '0' + curr_ID_str curr_foldername = 'Person_' + curr_ID_str all_targetnames.append(curr_foldername) all_recs = glob.glob(curr_foldername + "/*.dat") all_seg_avgs_i = [] for one_rec in all_recs: # One rec has many pulses filename = one_rec[:-4] sig, fields = wfdb.rdsamp(filename) #, pbdl=0) fs = fields['fs'] sig_origin = sig[:, 0] out = ecg.ecg(signal=sig_origin, sampling_rate=fs, show=False) peaks = out[2] segs = get_segs(sig_origin, peaks, seg_len) qual_len = int(round(seg_len / 1000. * fs)) # len of a complete QRS seg qual_num = 0 seg_sum = [0] * qual_len for seg in segs: if len(seg) == qual_len: qual_num += 1
def segmentation(records, type, class_counter, output_dir=''): """'N' for normal beats. Similarly we can give the input 'L' for left bundle branch block beats. 'R' for right bundle branch block beats. 'A' for Atrial premature contraction. 'V' for ventricular premature contraction. '/' for paced beat. 'E' for Ventricular escape beat.""" os.makedirs(output_dir, exist_ok=True) results = [] kernel = np.ones((4, 4), np.uint8) count = 1 ''' max_values = [] min_values = [] mean_values = [] for e in tqdm(records): signals, fields = wfdb.rdsamp(e, channels=[0]) mean_values.append(np.mean(signals)) mean_v = np.mean(np.array(mean_values)) std_v = 0 count = 0 for e in tqdm(records): signals, fields = wfdb.rdsamp(e, channels=[0]) count += len(signals) for s in signals: std_v += (s[0] - mean_v)**2 std_v = np.sqrt(std_v/count)''' mean_v = -0.33859 std_v = 0.472368 floor = mean_v - 3 * std_v ceil = mean_v + 3 * std_v for e in records: signals, fields = wfdb.rdsamp(e, channels=[0]) ann = wfdb.rdann(e, 'atr') good = [type] ids = np.in1d(ann.symbol, good) imp_beats = ann.sample[ids] beats = (ann.sample) for i in tqdm(imp_beats): beats = list(beats) j = beats.index(i) if (j != 0 and j != (len(beats) - 1)): data = (signals[beats[j] - 96:beats[j] + 96, 0]) data = np.array(data) # discard missing values if data.shape[0] == 192: results.append(data) plt.axis([0, 192, floor, ceil]) plt.plot(data, linewidth=0.5) plt.xticks([]), plt.yticks([]) for spine in plt.gca().spines.values(): spine.set_visible(False) filename = output_dir + 'fig_{}_{}'.format( class_counter, count) + '.png' plt.savefig(filename) plt.close() im_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) im_gray = cv2.erode(im_gray, kernel, iterations=1) im_gray = cv2.resize(im_gray, (192, 128), interpolation=cv2.INTER_LANCZOS4) cv2.imwrite(filename, im_gray) #print('img writtten {}'.format(filename)) count += 1 return results
def make_dataset(records, data_path, ds_config, leads_x_rec=[], data_aumentation=1, ds_name='none'): signals, labels = [], [] nQRS_QRS_ratio = [] cant_latidos_total = 0 # Recorro los archivos # for this_rec in records: if len(leads_x_rec) == 0: leads_x_rec = ['all'] * len(records) start_beat_idx = 0 # start_beat_idx = ((np.array(records) == 'stdb/315').nonzero())[0][0] # start_beat_idx = ((np.array(records) == 'ltafdb/20').nonzero())[0][0] # start_beat_idx = ((np.array(records) == 'edb/e0204').nonzero())[0][0] # start_beat_idx = ((np.array(records) == 'sddb/49').nonzero())[0][0] tgt_ratio = np.nan min_cant_latidos = 34e10 if np.isnan(ds_config['tgt_ratio']): for ii in np.arange(start_beat_idx, len(records)): this_rec = records[ii] print('Procesando:' + this_rec) data, field = wf.rdsamp(os.path.join(data_path, this_rec)) annotations = wf.rdann(os.path.join(data_path, this_rec), 'atr') # filtro los latidos beats = get_beats(annotations) this_cant_latidos = len(beats) min_cant_latidos = np.min([min_cant_latidos, this_cant_latidos]) cant_latidos_total += this_cant_latidos w_in_samp = my_int(ds_config['width'] * field['fs']) hw_in_samp = my_int(ds_config['width'] * field['fs'] / 2) tol_in_samp = my_int(ds_config['heartbeat_tolerance'] * field['fs']) samp_around_beats = w_in_samp * len(beats) # acumulo los ratios de QRS sobre el total de muestras para mantener ese ratio en el train ds this_ratio = (field['sig_len'] - samp_around_beats) / samp_around_beats nQRS_QRS_ratio.append(this_ratio) # target proportion ratio tgt_ratio = np.median(nQRS_QRS_ratio) else: tgt_ratio = ds_config['tgt_ratio'] if not ds_config['target_beats'] is None: print('*********************************************') print('Construyendo para ' + str(ds_config['target_beats']) + ' latidos por paciente.') print('El ratio no_latido/latido es: {:3.3f}'.format(tgt_ratio)) print('*********************************************') if ds_config['target_beats'] > min_cant_latidos: print('*********************************************') print('OJALDRE! Hay registros con menos latidos: ' + str(min_cant_latidos) + ' latidos') print('*********************************************') start_beat_idx = 0 all_signals = [] ds_part = 1 cant_total_samples = 0 ds_parts_fn = [] ds_parts_size = [] ds_parts_features = [] w_in_samp = my_int(ds_config['width'] * ds_config['target_fs']) hw_in_samp = my_int(ds_config['width'] * ds_config['target_fs'] / 2) tol_in_samp = my_int(ds_config['heartbeat_tolerance'] * ds_config['target_fs']) # for this_rec in records: for ii in np.arange(start_beat_idx, len(records)): this_rec = records[ii] this_leads_idx = leads_x_rec[ii] print( str(my_int(ii / len(records) * 100)) + '% Procesando:' + this_rec) data, field = wf.rdsamp(os.path.join(data_path, this_rec)) annotations = wf.rdann(os.path.join(data_path, this_rec), 'atr') if this_leads_idx != 'all': [_, this_leads_idx, _] = np.intersect1d(field['sig_name'], this_leads_idx.strip(), assume_unique=True, return_indices=True) if len(this_leads_idx) > 0: data = data[:, this_leads_idx] field['n_sig'] = len(this_leads_idx) pq_ratio = ds_config['target_fs'] / field['fs'] resample_frac = Fraction(pq_ratio).limit_denominator(20) #recalculo el ratio real pq_ratio = resample_frac.numerator / resample_frac.denominator data = sig.resample_poly(data, resample_frac.numerator, resample_frac.denominator) # filtro los latidos beats = get_beats(annotations) # resample references beats = np.round(beats * pq_ratio) this_cant_beats = len(beats) # genero las referencias temporales para generar los vectores de entrada no_qRS_ranges, qRS_ranges = gen_interest_ranges( start_end=[0, np.round(field['sig_len'] * pq_ratio)], references=beats, width=w_in_samp) if not ds_config['target_beats'] is None and ds_config[ 'target_beats'] <= this_cant_beats: this_beats_idx = np.sort( np.random.choice(np.arange(len(qRS_ranges)), ds_config['target_beats'], replace=False)) qRS_ranges = np.vstack(qRS_ranges) qRS_ranges = qRS_ranges[this_beats_idx, :] qRS_ranges = qRS_ranges.tolist() this_cant_no_beats = my_ceil(len(qRS_ranges) * tgt_ratio) if this_cant_no_beats < len(no_qRS_ranges): # solo me quedo con un subset aleatorio this_beats_idx = np.sort( np.random.choice(np.arange(len(no_qRS_ranges)), this_cant_no_beats, replace=False)) no_qRS_ranges = np.vstack(no_qRS_ranges) no_qRS_ranges = no_qRS_ranges[this_beats_idx, :] no_qRS_ranges = no_qRS_ranges.tolist() else: # aumento la cantidad de segmentos no_QRS de acuerdo al ratio deseado segments_repeat = my_int( np.abs( len(no_qRS_ranges) - np.ceil(len(qRS_ranges) * tgt_ratio))) no_qRS_ranges += [ no_qRS_ranges[np.random.randint(len(no_qRS_ranges))] for _ in range(segments_repeat) ] # genero los comienzos aumentados de acuerdo a data_aumentation starts = [] starts += [ (np.random.randint(this_start_end[0] - hw_in_samp, this_start_end[1] - hw_in_samp, size=data_aumentation, dtype='int')).reshape(data_aumentation, 1) for this_start_end in no_qRS_ranges ] starts += [ (np.random.randint(this_start_end[0] - tol_in_samp, this_start_end[0] + tol_in_samp, size=data_aumentation, dtype='int')).reshape(data_aumentation, 1) for this_start_end in qRS_ranges ] # starts += [ this_start_end[0] for this_start_end in qRS_ranges ] # 0: No QRS - 1: QRS this_lab = np.concatenate( (np.zeros((my_int( len(no_qRS_ranges) * field['n_sig'] * data_aumentation), 1)), np.ones((my_int( len(qRS_ranges) * field['n_sig'] * data_aumentation), 1))), axis=0) # unbias and normalize bScaleRecording = True # bScaleRecording = False if bScaleRecording: # this_scale = mad(data, axis=0).reshape(field['n_sig'], 1 ) # usando los latidos this_scale = (np.nanmedian(np.vstack([ np.max(np.abs( data[my_int(np.max([0, this_beat - w_in_samp])):my_int( np.min([ field['sig_len'] * pq_ratio, this_beat + w_in_samp ])), :]), axis=0) for this_beat in beats ]), axis=0)).reshape(field['n_sig'], 1) bAux = np.bitwise_or(this_scale == 0, np.isnan(this_scale)) if np.any(bAux): # avoid scaling in case 0 or NaN this_scale[bAux] = 1 starts = np.vstack(starts) the_sigs = [] for this_start in starts: # try: this_sig = np.transpose( data[my_int(this_start):my_int(this_start + w_in_samp), :]) # unbias and normalize this_sig = this_sig - np.nanmedian(this_sig, axis=1, keepdims=True) if not (bScaleRecording): this_scale = mad(this_sig, center=0, axis=1).reshape(this_sig.shape[0], 1) bAux = np.bitwise_or(this_scale == 0, np.isnan(this_scale)) if np.any(bAux): # avoid scaling in case 0 or NaN this_scale[bAux] = 1 # add an small dither # this_sig = this_sig * 1/this_scale + 1/500 * np.random.randn(this_sig.shape[0], this_sig.shape[1]) this_sig = this_sig * 1 / this_scale this_sig = (np.clip(np.round(this_sig * (2**15 - 1) * 0.5), -(2**15 - 1), 2**15 - 1)).astype('int16') the_sigs += [this_sig] # except Exception: # # a = 0 if len(all_signals) == 0: all_labels = this_lab all_signals = np.vstack(the_sigs) else: all_labels = np.concatenate((all_labels, this_lab)) all_signals = np.concatenate((all_signals, np.vstack(the_sigs))) if sys.getsizeof(all_signals) > ds_config['dataset_max_size']: part_fn = 'ds_' + ds_name + '_part_' + str(ds_part) + '.npy' ds_parts_fn += [part_fn] ds_parts_size += [all_signals.shape[0]] ds_parts_features += [all_signals.shape[1]] cant_total_samples += all_signals.shape[0] np.save(os.path.join(ds_config['dataset_path'], part_fn), { 'signals': all_signals, 'labels': all_labels }) ds_part += 1 all_signals = [] all_labels = [] if ds_part > 1: # last part part_fn = 'ds_' + ds_name + '_part_' + str(ds_part) + '.npy' ds_parts_fn += [part_fn] ds_parts_size += [all_signals.shape[0]] ds_parts_features += [all_signals.shape[1]] np.save( os.path.join(ds_config['dataset_path'], part_fn), { 'signals': all_signals, 'labels': all_labels, 'cant_total_samples': all_signals.shape[0] }) all_signals = [] all_labels = [] aux_df = DataFrame({ 'filename': ds_parts_fn, 'ds_size': ds_parts_size, 'ds_features': ds_parts_features, }) else: part_fn = 'ds_' + ds_name + '.npy' # unique part np.save( os.path.join(ds_config['dataset_path'], part_fn), { 'signals': all_signals, 'labels': all_labels, 'cant_total_samples': all_signals.shape[0] }) aux_df = DataFrame({ 'filename': [part_fn], 'ds_size': [all_signals.shape[0]], 'ds_features': [all_signals.shape[1]] }) aux_df.to_csv(os.path.join(ds_config['dataset_path'], ds_name + '_size.txt'), sep=',', header=False, index=False) return all_signals, all_labels, ds_part
# Test data is taken from: # https://physionet.org/physiobank/database/mitdb/ # 205 # https://physionet.org/physiobank/database/mitdb/205.dat from scipy.signal import hilbert, cheby1, filtfilt import numpy as np from bokeh.plotting import figure, output_file, show import wfdb # ================================================================= # Parameters # ================================================================= # Get the sample data f205 = wfdb.rdsamp('mitdb/205', pb_dir='mitdb') # EKG array EKG = f205[0][:, 0] # Define the sampling rate sampling_rate = f205[1]['fs'] # Time time_sec = np.arange(0, len(EKG) * sampling_rate) # ================================================================= # Define Filter Functions # There are two filters in this proceudre # 1. Chebyshev Type I filter # 2. Running mean filter # =================================================================
4 - Axis Shift (shift) - SST; ''' #signal = ... #columns = ... FILENAME = sys.argv[1] columns = pd.DataFrame.from_csv(FILENAME + ".csv") normalEndIndex = columns['1'].iloc[0] - 1300 normaldf = pd.DataFrame({'0': ['normal'], '1': [0], '2': [normalEndIndex]}) columns = columns.append(normaldf) labelDict = {"st": 0, "rtst": 1, "sccst": 2, "sst": 3, "normal": 4} numSecondsPerChunk = 5 deltaFreq = 5 signals, fields = wfdb.rdsamp(FILENAME) signals = signals[:, 0] freq = fields['fs'] lengthOfChunk = numSecondsPerChunk * freq data = pd.DataFrame({"Signal": [], "Label": []}) for index, row in columns.iterrows(): label = row[0] begRange = row[1] endRange = row[2] for j in range(begRange, endRange, lengthOfChunk): x = np.array([signals[y] for y in range(j, j + lengthOfChunk)]) if len(x) == lengthOfChunk: x = x.reshape(len(x) // deltaFreq, deltaFreq).mean(1).flatten() df2 = pd.DataFrame({"Signal": [x], "Label": [label]})
def preprocess_data(path, save_path, prefix): valid_lead = ['MLII', 'ECG', 'V5', 'V2', 'V1', 'ECG1', 'ECG2' ] # extract all similar leads t = 2 window_size_t = 2 # second stride_t = 2 # second test_ind = [] all_pid = [] all_data = [] all_label = [] with open(os.path.join(path, 'RECORDS'), 'r') as fin: all_record_name = fin.read().strip().split('\n') for record_name in all_record_name: cnt = 0 try: tmp_ann_res = wfdb.rdann(path + '/' + record_name, 'atr').__dict__ tmp_data_res = wfdb.rdsamp(path + '/' + record_name) except: print('read data failed') continue fs = tmp_data_res[1]['fs'] window_size = int(fs*window_size_t) stride = int(fs*stride_t) # tmp_data_res = bandpass_filter(tmp_data_res, 0.5, 50) lead_in_data = tmp_data_res[1]['sig_name'] print(lead_in_data) my_lead_all = [] for tmp_lead in valid_lead: if tmp_lead in lead_in_data: my_lead_all.append(tmp_lead) if len(my_lead_all) != 0: for my_lead in range(len(lead_in_data)): pp_pid = [] pp_data = [] pp_label = [] channel = my_lead tmp_data = tmp_data_res[0][:, channel] idx_list = tmp_ann_res['sample'] label_list = np.array(tmp_ann_res['symbol']) aux_list = np.array([i.strip('\x00') for i in tmp_ann_res['aux_note']]) full_aux_list = [''] * tmp_data_res[1]['sig_len'] # expand aux to full length for i in range(len(aux_list)): full_aux_list[idx_list[i]] = aux_list[i] # copy old aux if label_list[i] in ['[', '!']: full_aux_list[idx_list[i]] = '(VF' # copy VF start from beat labels if label_list[i] in [']']: full_aux_list[idx_list[i]] = '(N' # copy VF end from beat labels for i in range(1,len(full_aux_list)): if full_aux_list[i] == '': full_aux_list[i] = full_aux_list[i-1] # copy full_aux_list from itself, fill empty strings idx_start = 0 while idx_start < len(tmp_data) - window_size: idx_end = idx_start+window_size tmpdata = resample_unequal(tmp_data[idx_start:idx_end], fs, fs_out, t) if not -100 < np.mean(tmpdata) < 100 or np.std(tmpdata) == 0: idx_start += fs continue pp_pid.append("{}".format(record_name + '+' + str(my_lead))) pp_data.append(resample_unequal(tmp_data[idx_start:idx_end], fs, fs_out, t)) tmp_label_beat = label_list[np.logical_and(idx_list>=idx_start, idx_list<=idx_end)] tmp_label_rhythm = full_aux_list[idx_start:idx_end] # be careful tmp_label = list(np.unique(tmp_label_beat))+list(np.unique(tmp_label_rhythm)) tmp_label = get_label_map(tmp_label) if 'VF/VT' in tmp_label and cnt <= 150: idx_start += int(0.1 * fs) cnt += 1 else: idx_start += 2 * fs pp_label.append(tmp_label) all_pid.extend(pp_pid) all_data.extend(pp_data) all_label.extend(pp_label) print('record_name:{}, len:{}, lead:{}, fs:{}, count:{}, labels:{}'.format(record_name, tmp_data_res[1]['sig_len'], my_lead, fs, len(pp_data), Counter(flatten(pp_label)))) else: print('lead in data: [{0}]. no valid lead in {1}'.format(lead_in_data, record_name)) continue all_pid = np.array(all_pid) all_data = np.array(all_data) all_label = np.array(all_label) print(all_pid.shape, all_data.shape) print(Counter(flatten(all_label))) print(Counter([tuple(_) for _ in all_label])) np.save(os.path.join(save_path, '{}_pid.npy'.format(prefix)), all_pid) np.save(os.path.join(save_path, '{}_data.npy'.format(prefix)), all_data) np.save(os.path.join(save_path, '{}_label.npy'.format(prefix)), all_label) print('{} done'.format(prefix))
import wfdb dbName = 'wrist' recName = 's1_walk' # Read part of a record from Physiobank sig, fields = wfdb.rdsamp(dbName + '/' + recName, sampfrom=1000, channels=[0]) record = wfdb.rdrecord(dbName + '/' + recName) print(record.__dict__) # Call the gateway wrsamp function, manually inserting fields as function input parameters wfdb.wrsamp('ecg-record', fs=256, units=['mV'], sig_name=['chest_ecg'], p_signal=sig, fmt=['16']) # The new file can be read record = wfdb.rdrecord('ecg-record') wfdb.plot_wfdb(record)
high = highcut / nyq b, a = butter(order, [low, high], btype='band') y = lfilter(b, a, data) y=y[::-1] y=y+600 return y path='/home/reetu/www.physionet.org/physiobank/database/mimic2wdb/matched/s10842/' lst=[] for filename in os.listdir(path): lst.append(filename) lst.sort() for i in range(1,len(lst),2): str=lst[i] sig,fld=wfdb.srdsamp(path+(str[0:len(str)-4])) rec=wfdb.rdsamp(path+(str[0:len(str)-4])) #print(sig) #print(fld) sig_name=fld['signame'] flg=1 for k in range(len(sig_name)): if sig_name[k]== 'PLETH': flg=0; print(k) break; if flg==0: sz=list(sig.shape) print(sz) print(str[0:len(str)-4])
a = 0 # We exctract 120 samples to the left and right from the beat label half_qrs = 120 # Search in each file name of the folder 'mitdb' for filename in os.listdir('mitdb'): # DEBUG: To avoid reading every file #for i in range (1): # DEBUG: To avoid reading every file #filename = '100.dat' # DEBUG: To avoid reading every file # If file name is a .dat and is not one of (102, 104, 107 or 217) recordings if filename.endswith(".dat") and not filename.startswith( ('102', '104', '107', '217')): # Read annotation (strip the .dat) ann = wfdb.rdann('mitdb/' + filename.strip('.dat'), 'atr') # Read signal file record = wfdb.rdsamp('mitdb/' + filename.strip('.dat')) # The signal is in the position 0 of 'record' data = record[0] # Prepare containers signals, classes = [], [] firstch_signals = [] firstch_class = [] # Beat extraction for it, beat in enumerate(ann.symbol): if beat == 'N': # in good_beats: a = a + 1
if int(new[2])<9: string_2 = new[:2] + list(str(0)) string_3 = list("f".join(str(e) for e in string_2[:3])) + list(str(int(new[2])+1)) string = "".join(str(e) for e in string_3) else: string_2 = new[:2] + list(str(int(new[2])+1)) string_3 = list("f".join(str(e) for e in string_2[:3])) + list(string_2[3]) string = "".join(str(e) for e in string_3) #print(string) # HPF for j in range(500): record = wfdb.rdsamp(string,sampfrom=x,sampto=y) sig = record.p_signals sig_1 = butter_highpass_filter(sig, cutoff, fs, order) n = len(sig) # total number of samples T = n/fs t = np.linspace(0, T, n, endpoint=False) data=[] data_1=[] for k in range(1024): data.append(sig[k][0])
import wfdb import numpy as np import matplotlib.pyplot as plt from scipy.signal import butter, lfilter, freqz import scipy # Input time settings #t0 = int(input("Start time?(min): ") * 360 * 60) #tf = int(input("End time?(min): ") * 360 * 60) t0 = 0 tf = int(0.1 * 360 * 60) # Import ECG signal and annotations ecg_signal = wfdb.rdsamp("/Users/WoochanH/python/ecgproject/sampledata/101", sampfrom = t0, sampto = tf, channels = [0]) ecg_ann = wfdb.rdann("/Users/WoochanH/python/ecgproject/sampledata/101", "atr", sampfrom = t0, sampto = tf) print(dir(ecg_signal)); print(np.shape(ecg_signal.p_signals)) # Import EMG signal and annotations emg_signal = wfdb.rdsamp("/Users/WoochanH/python/ecgproject/sampledata/emg_healthy", sampfrom = t0, sampto = tf, channels = [0]) print(dir(emg_signal)); print(np.shape(emg_signal.p_signals)) # Reshape analouge data = newraw def Reshape(signal_input, outfunction): signal = signal_input.p_signals for i in range(0,len(signal)): outfunction.append(float(signal[i][0])) return outfunction
plt.rcParams.update({'font.size': 22}) from scipy import signal import numpy as np import os import wfdb import heartpy as hp from DWT_ECG import r_isolate_wavelet from scipy.interpolate import UnivariateSpline, interp1d from scipy.signal import welch, periodogram #get signal- this is just a sample record from the MIT database os.chdir("/Users/miagiandinoto/Desktop/College/BMED 2250/phase 2 code/Data118") clean, fields = wfdb.rdsamp('118', channels=[0], sampfrom=0, sampto=110000) clean = clean.flatten() fs = fields.get('fs') xclean = r_isolate_wavelet(clean, fs, len(clean)) peaks, _ = signal.find_peaks(xclean, prominence=0.5 * max(clean), distance=200) ybeat = clean[peaks] #function takes in the list of detected rpeaks, list of where each peak occurs in the signal(ybeat) #ybeat should just be signal[peaks] #fs is sampling frequency #method can be either 'fft' or 'periodogram' def get_hrv(peaks, ybeat, fs, method): #create empty dicts 'wd' and 'measures'. wd = {} measures = {}
# data_unhealthy = [] # for file in files_unhealthy: # data_v4, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_1)]) # data_v5, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_2)]) # data = [data_v4.flatten(), data_v5.flatten()] # data_unhealthy.append(data) # data_healthy = [] # for file in files_healthy: # data_v4, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_1)]) # data_v5, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_2)]) # data = [data_v4.flatten(), data_v5.flatten()] # data_healthy.append(data) data_healthy_train = [] for file in healthy_train: data_v4, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_1)]) data = [data_v4.flatten()] data_healthy_train.append(data) data_healthy_val = [] for file in healthy_val: data_v4, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_1)]) data = [data_v4.flatten()] data_healthy_val.append(data) data_unhealthy_train = [] for file in unhealthy_train: data_v4, _ = wfdb.rdsamp("ptbdb_data/" + file[:-1], channel_names=[str(channel_1)]) data = [data_v4.flatten()] data_unhealthy_train.append(data) data_unhealthy_val = []
def test_5d(self): record=wfdb.rdsamp('sampledata/multisegment/fixed1/v102s', sampto = 75000) siground=np.round(record.p_signals, decimals=8) targetsig=np.genfromtxt('tests/targetoutputdata/target5d') np.testing.assert_equal(siground, targetsig)