def load(self, backup_path=None, num_records=None, checkpoint_interval=100): samples_per_second = self.samples_per_second total_sample_size_seconds = self.sample_size_seconds * self.seq_len to_wavelet = self.to_wavelet if backup_path is not None: filename = ('{}.pt' if self.to_wavelet is None else '{}_transformed.pt').format(self.dataset_name) file_path = os.path.join(backup_path, filename) if not os.path.exists(backup_path): os.makedirs(backup_path) else: file_path = None if file_path is not None and os.path.isfile(file_path): data = torch.load(file_path) print('Loaded from backup') self.samples, self.labels = data['samples'], data['labels'] return print('Reading records. sample size={}, frequency={}'.format(total_sample_size_seconds, samples_per_second)) data, labels = read_records(self.dataset_name, self.data_path, sample_size_seconds=total_sample_size_seconds, samples_per_second=samples_per_second, num_records=num_records) labels = torch.tensor(labels) print(labels.shape) print(len(data)) data = [split_sample(sample, self.sample_size_seconds) for sample in data] count = len(data) transformed_data = [] print('Preparing {} samples'.format(count)) num_samples = 0 for sample_idx, sample in tqdm(enumerate(data[:count]), desc='Preprocessing examples'): wavelets = [] for signal_idx, signal in enumerate(sample): signal = wfdb_processing.normalize_bound(signal.numpy()) if to_wavelet is not None: sw = to_wavelet(signal) else: sw = signal wavelets.append(torch.tensor(sw)) t = torch.stack(wavelets) transformed_data.append(t) num_samples += 1 transformed_data = torch.stack(transformed_data) if backup_path is not None: torch.save({ 'samples': transformed_data, 'labels': labels }, file_path) self.samples, self.labels = transformed_data, labels
def test_normalize_bound(self): sig, _ = wfdb.rdsamp('sample-data/100') lb = -5 ub = 15 x = processing.normalize_bound(sig[:, 0], lb, ub) assert x.shape[0] == sig.shape[0] assert np.min(x) >= lb assert np.max(x) <= ub
def tsaug_generator(X_all, y_all, batch_size): """ Generate Time Series data with sequence labels Data generator that yields training data as batches. 1. Randomly selects one sample from time series signals 2. Applies time series augmentations to X and Y 3. Normalizes result for X data 4. Reshapes data into correct format for training Parameters ---------- X_all : 3D numpy array (N, seqlen, features=1) y_all : 3D numpy array (binary labels for my case) (N, seqlen, classes=1) batch_size : int Number of training examples in the batch Yields ------ (X, y) : tuple Contains training samples with corresponding labels """ while True: X = [] y = [] while len(X) < batch_size: random_sig_idx = np.random.randint(0, X_all.shape[0]) x1 = X_all[random_sig_idx].flatten() y1 = y_all[random_sig_idx].flatten() # Augment X and y and normalize it again X_aug, y_aug = my_augmenter.augment(x1, y1) X_aug = normalize_bound(X_aug, lb=-1, ub=1) X.append(X_aug) y.append(y_aug) X = np.asarray(X) y = np.asarray(y) X = X.reshape(X.shape[0], X.shape[1], 1) y = y.reshape(y.shape[0], y.shape[1], 1).astype(int) yield (X, y)
def ecg_generator(name, signals, wgn, ma, bw, win_size, batch_size): """ Generate ECG data with R-peak labels. Data generator that yields training data as batches. Every instance of training batch is composed as follows: 1. Randomly select one ECG signal from given list of ECG signals 2. Randomly select one window of given win_size from selected signal 3. Check that window has at least one beat and that all beats are labled as normal 4. Create label window corresponding the selected window -beats and four samples next to beats are labeled as 1 while rest of the samples are labeled as 0 5. Normalize selected signal window from -1 to 1 6. Add noise into signal window and normalize it again to (-1, 1) 7. Add noisy signal and its labels to trainig batch 8. Transform training batches to arrays of needed shape and yield training batch with corresponding labels when needed Parameters ---------- signals : list List of ECG signals peaks : list List of peaks locations for the ECG signals labels : list List of labels (peak types) for the peaks ma : array Muscle artifact signal bw : array Baseline wander signal win_size : int Number of time steps in the training window batch_size : int Number of training examples in the batch Yields ------ (X, y) : tuple Contains training samples with corresponding labels """ print('processing') while True: x = [] section = [] noise_list = [] while len(x) < batch_size: random_sig_idx = np.random.randint(0, len(signals)) random_sig = signals[random_sig_idx] #print(random_sig_idx) # Select one window beg = np.random.randint(random_sig.shape[0] - win_size) end = beg + win_size #section = normalize_bound(section, lb=-1, ub=1) section.append(random_sig[beg:end]) # Select data for window and normalize it (-1, 1) data_win = normalize_bound(random_sig[beg:end], lb=-1, ub=1) # Add noise into data window and normalize it again added_noise = get_noise(name, wgn, ma, bw, win_size) added_noise = normalize_bound(added_noise, lb=-1, ub=1) noise_list.append(added_noise) data_win = data_win + added_noise data_win = normalize_bound(data_win, lb=-1, ub=1) x.append(data_win) section = np.asarray(section) section = section.reshape(section.shape[0], section.shape[1], 1) #print(section, section.shape, type(section)) x = np.asarray(x) print(x.shape) x = x.reshape(x.shape[0], x.shape[1], 1) #print(x) id = np.random.randint(0, len(x)) plt.subplot(311) plt.plot(section[id]) plt.title('original ' + name + ' ecg') plt.subplot(312) plt.plot(x[id]) plt.title('noised ' + name + ' ecg') plt.subplot(313) plt.title('added noise') plt.plot(noise_list[id]) #plt.savefig(name+'.png') plt.show() print('shape:', x.shape) serialization(name + '_original_ecg', section) serialization(name + '_noised_ecg', x) return x, section
def utama(filename): global timenow path_data = filename path_model = "Best Model 3 Classes.h5" ext = path_data.split(".")[-1] namefile = path_data.split('/')[4].split('.')[0] print(filename, ext, namefile) if ext == 'xml': with open(path_data) as fopen: file = fopen.read() data = xmltodict.parse(file, encoding='latin-1') raw = data['CardioXP']['StudyInfo']['WaveData'][1]['Data'] signal = raw.split(' ') signal = np.array(signal, dtype=int) plt.figure(figsize=(15, 10)) plt.plot(range(2700), signal[:2700]) saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png' plt.savefig(saving_file_bef) signal = normalize_bound(signal, lb=0, ub=1) signal = denoising(signal) signal = signal[0:2700] plt.figure(figsize=(15, 10)) plt.plot(range(2700), signal[:2700]) saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png' plt.savefig(saving_file_aft) elif ext == 'dat': record = wfdb.rdrecord(path_data) record_dict = record.__dict__ p_signal = record_dict['p_signal'][:, 0] plt.figure(figsize=(15, 10)) plt.plot(range(2700), p_signal[:2700]) saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png' plt.savefig(saving_file_bef) p_signal = normalize_bound(p_signal, lb=0, ub=1) p_signal = denoising(p_signal) signal = p_signal[0:2700] plt.figure(figsize=(15, 10)) plt.plot(range(2700), signal[:2700]) saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png' plt.savefig(saving_file_aft) elif ext == 'mat': data = scipy.io.loadmat(path_data) sampels = data['val'][0] plt.figure(figsize=(15, 10)) plt.plot(range(2700), sampels[:2700]) saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png' plt.savefig(saving_file_bef) sampels = normalize_bound(sampels, lb=0, ub=1) sampels = denoising(sampels) signal = sampels[0:2700] plt.figure(figsize=(15, 10)) plt.plot(range(2700), signal[:2700]) saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png' plt.savefig(saving_file_aft) else: sys.exit("Not A Valid Data") if len(signal) < 2700: sys.exit("Signal length is not sufficient") signal = np.reshape(signal, (1, 2700, 1)) model = load_model(path_model) predicted_class = model.predict_classes(signal)[0] if predicted_class == 0: clear_session() print("Normal") return 'Normal', namefile + '-' + timenow elif predicted_class == 1: clear_session() print("Atrial Fibrilation") return 'Atrial Fibrilation', namefile + '-' + timenow else: clear_session() print("Non AF") return 'Non AF', namefile + '-' + timenow
for item in np.arange(len(path_split)): record = wfdb.rdrecord(path_split[item]) record_dict = record.__dict__ signal = record_dict['p_signal'][:, 0] annotation = wfdb.rdann(path_split[item], 'atr') ann_dict = annotation.__dict__ symbol = ann_dict['symbol'] peaks = ann_dict['sample'] name = ann_dict['record_name'] fs = ann_dict['fs'] t1 = np.int(0.25 * fs) t2 = np.int(0.45 * fs) peak = np.arange(len(peaks)) new_signal = utility.wavelet_transform(signal, 8, 'sym5') new_signal = normalize_bound(new_signal) beats = [] labels = [] for x in peak: if (peaks[x] - t1) > 0 and (peaks[x] + t2) < len(signal): #Ini segmentasi #if (symbol[x] == 'A' or symbol[x] == 'L' or symbol[x] == 'N' or symbol[x] == '.' or symbol[x] == 'P' or symbol[x] == 'R' or symbol[x] == 'V' or symbol[x] == 'f' or symbol[x] == 'F' or symbol[x] == '!' or symbol[x] == 'j' ): if (symbol[x] == '~' or symbol[x] == '|' or symbol[x] == '+' or symbol[x] == 'B' or symbol[x] == 'F' or symbol[x] == 'f' or symbol[x] == 'Q' or symbol[x] == 'a' or symbol[x] == 'J'): continue else: beat = new_signal[peaks[x] - t1:peaks[x] + t2]
def dataGeneration(data_path, csv_path, record_path): # initialize dataset dataset = pd.DataFrame(columns=['label', 'record']) if record_path == None: # a loop for each patient detail_path = data_path + '/' record_files = [ i.split('.')[0] for i in os.listdir(detail_path) if (not i.startswith('.') and i.endswith('.hea')) ] Bar.check_tty = False bar = Bar('Processing', max=len(record_files), fill='#', suffix='%(percent)d%%') # a loop for each record for record_name in record_files: # load record signal, info = wfdb.rdsamp(detail_path + record_name) fs = 200 signal = processing.resample_sig(signal[:, 0], info['fs'], fs)[0] # set some parameters window_size_half = int(fs * 0.125 / 2) max_bpm = 230 # detect QRS peaks qrs_inds = processing.gqrs_detect(signal, fs=fs) search_radius = int(fs * 60 / max_bpm) corrected_qrs_inds = processing.correct_peaks( signal, peak_inds=qrs_inds, search_radius=search_radius, smooth_window_size=150) average_qrs = 0 count = 0 for i in range(1, len(corrected_qrs_inds) - 1): start_ind = corrected_qrs_inds[i] - window_size_half end_ind = corrected_qrs_inds[i] + window_size_half + 1 if start_ind < corrected_qrs_inds[ i - 1] or end_ind > corrected_qrs_inds[i + 1]: continue average_qrs = average_qrs + signal[start_ind:end_ind] count = count + 1 # remove outliers if count < 8: print('\noutlier detected, discard ' + record_name) continue average_qrs = average_qrs / count corrcoefs = [] for i in range(1, len(corrected_qrs_inds) - 1): start_ind = corrected_qrs_inds[i] - window_size_half end_ind = corrected_qrs_inds[i] + window_size_half + 1 if start_ind < corrected_qrs_inds[ i - 1] or end_ind > corrected_qrs_inds[i + 1]: corrcoefs.append(-100) continue corrcoef = pearsonr(signal[start_ind:end_ind], average_qrs)[0] corrcoefs.append(corrcoef) max_corr = list(map(corrcoefs.index, heapq.nlargest(8, corrcoefs))) index_corr = random.sample( list(itertools.permutations(max_corr, 8)), 100) for index in index_corr: # a temp dataframe to store one record record_temp = pd.DataFrame() signal_temp = [] for i in index: start_ind = corrected_qrs_inds[i + 1] - window_size_half end_ind = corrected_qrs_inds[i + 1] + window_size_half + 1 sig = processing.normalize_bound(signal[start_ind:end_ind], -1, 1) signal_temp = np.concatenate((signal_temp, sig)) record_temp = record_temp.append(pd.DataFrame( signal_temp.reshape(-1, signal_temp.shape[0])), ignore_index=True, sort=False) record_temp['label'] = record_name record_temp['record'] = record_name # add it to final dataset dataset = dataset.append(record_temp, ignore_index=True, sort=False) bar.next() bar.finish() else: patient_folders = [ i for i in os.listdir(data_path) if (not i.startswith('.') and i.startswith(record_path)) ] Bar.check_tty = False bar = Bar('Processing', max=len(patient_folders), fill='#', suffix='%(percent)d%%') # a loop for each patient for patient_name in patient_folders: detail_path = data_path + patient_name + '/' record_files = [ i.split('.')[0] for i in os.listdir(detail_path) if i.endswith('.hea') ] # a loop for each record for record_name in record_files: # load record signal, info = wfdb.rdsamp(detail_path + record_name) fs = 200 signal = processing.resample_sig(signal[:, 0], info['fs'], fs)[0] # set some parameters window_size_half = int(fs * 0.125 / 2) max_bpm = 230 # detect QRS peaks qrs_inds = processing.gqrs_detect(signal, fs=fs) search_radius = int(fs * 60 / max_bpm) corrected_qrs_inds = processing.correct_peaks( signal, peak_inds=qrs_inds, search_radius=search_radius, smooth_window_size=150) average_qrs = 0 count = 0 for i in range(1, len(corrected_qrs_inds) - 1): start_ind = corrected_qrs_inds[i] - window_size_half end_ind = corrected_qrs_inds[i] + window_size_half + 1 if start_ind < corrected_qrs_inds[ i - 1] or end_ind > corrected_qrs_inds[i + 1]: continue average_qrs = average_qrs + signal[start_ind:end_ind] count = count + 1 # remove outliers if count < 8: print('\noutlier detected, discard ' + record_name + ' of ' + patient_name) continue average_qrs = average_qrs / count corrcoefs = [] for i in range(1, len(corrected_qrs_inds) - 1): start_ind = corrected_qrs_inds[i] - window_size_half end_ind = corrected_qrs_inds[i] + window_size_half + 1 if start_ind < corrected_qrs_inds[ i - 1] or end_ind > corrected_qrs_inds[i + 1]: corrcoefs.append(-100) continue corrcoef = pearsonr(signal[start_ind:end_ind], average_qrs)[0] corrcoefs.append(corrcoef) max_corr = list( map(corrcoefs.index, heapq.nlargest(8, corrcoefs))) index_corr = random.sample( list(itertools.permutations(max_corr, 8)), 100) for index in index_corr: # a temp dataframe to store one record record_temp = pd.DataFrame() signal_temp = [] for i in index: start_ind = corrected_qrs_inds[i + 1] - window_size_half end_ind = corrected_qrs_inds[i + 1] + window_size_half + 1 sig = processing.normalize_bound( signal[start_ind:end_ind], -1, 1) signal_temp = np.concatenate((signal_temp, sig)) record_temp = record_temp.append(pd.DataFrame( signal_temp.reshape(-1, signal_temp.shape[0])), ignore_index=True, sort=False) record_temp['label'] = patient_name record_temp['record'] = record_name # add it to final dataset dataset = dataset.append(record_temp, ignore_index=True, sort=False) bar.next() bar.finish() # save for further use dataset.to_csv(csv_path, index=False) print('processing completed')
def ecg_generator(signals, peaks, labels, ma, bw, win_size, batch_size): """ Generate ECG data with R-peak labels. Data generator that yields training data as batches. Every instance of training batch is composed as follows: 1. Randomly select one ECG signal from given list of ECG signals 2. Randomly select one window of given win_size from selected signal 3. Check that window has at least one beat and that all beats are labled as normal 4. Create label window corresponding the selected window -beats and four samples next to beats are labeled as 1 while rest of the samples are labeled as 0 5. Normalize selected signal window from -1 to 1 6. Add noise into signal window and normalize it again to (-1, 1) 7. Add noisy signal and its labels to trainig batch 8. Transform training batches to arrays of needed shape and yield training batch with corresponding labels when needed Parameters ---------- signals : list List of ECG signals peaks : list List of peaks locations for the ECG signals labels : list List of labels (peak types) for the peaks ma : array Muscle artifact signal bw : array Baseline wander signal win_size : int Number of time steps in the training window batch_size : int Number of training examples in the batch Yields ------ (X, y) : tuple Contains training samples with corresponding labels """ while True: X = [] y = [] while len(X) < batch_size: random_sig_idx = np.random.randint(0, len(signals)) random_sig = signals[random_sig_idx] p4sig = peaks[random_sig_idx] plabels = labels[random_sig_idx] # Select one window beg = np.random.randint(random_sig.shape[0]-win_size) end = beg + win_size # Select peaks that fall into selected window. # Buffer of 3 to the window edge is needed as labels are # inserted also next to point) p_in_win = p4sig[(p4sig >= beg+3) & (p4sig <= end-3)]-beg # Check that there is at least one peak in the window if p_in_win.shape[0] >= 1: # Select labels that fall into selected window lab_in_win = plabels[(p4sig >= beg+3) & (p4sig <= end-3)] # Check that every beat in the window is normal beat if np.all(lab_in_win == 1): # Create labels for data window window_labels = np.zeros(win_size) np.put(window_labels, p_in_win, lab_in_win) # Put labels also next to peak np.put(window_labels, p_in_win+1, lab_in_win) np.put(window_labels, p_in_win+2, lab_in_win) np.put(window_labels, p_in_win-1, lab_in_win) np.put(window_labels, p_in_win-2, lab_in_win) # Select data for window and normalize it (-1, 1) data_win = normalize_bound(random_sig[beg:end], lb=-1, ub=1) # Add noise into data window and normalize it again data_win = data_win + get_noise(ma, bw, win_size) data_win = normalize_bound(data_win, lb=-1, ub=1) X.append(data_win) y.append(window_labels) X = np.asarray(X) y = np.asarray(y) X = X.reshape(X.shape[0], X.shape[1], 1) y = y.reshape(y.shape[0], y.shape[1], 1).astype(int) yield (X, y)