def __data_generation(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = np.empty((self.batch_size, *self.dim, self.n_channels), dtype = float) y = np.empty((self.batch_size), dtype = int) # Generate data for i, ID in enumerate(list_IDs_temp): data = extend_ts(self.h5file[ID]['ecgdata'][:, 0], self.sequence_length) data = np.reshape(data, (1, len(data))) if self.augment: # dropout bursts data = zero_filter(data, threshold = 2, depth = 10) # random resampling data = random_resample(data) # Generate spectrogram data_spectrogram = spectrogram(data, nperseg = self.nperseg, noverlap = self.noverlap)[2] # Normalize data_transformed = norm_float(data_spectrogram, self.data_mean, self.data_std) X[i,] = np.expand_dims(data_transformed, axis = 3) # Assuming that the dataset names are unique (only 1 per label) y[i] = self.labels[ID] return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
def __data_generation(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = np.empty((self.batch_size, *self.dim, self.n_channels), dtype=float) y = np.empty((self.batch_size), dtype=int) # Generate data for i, ID in enumerate(list_IDs_temp): data = extend_ts(self.h5file[ID]['ecgdata'][:, 0], self.sequence_length) data = np.reshape(data, (1, len(data))) if self.augment: # dropout bursts data = zero_filter(data, threshold=2, depth=10) # random resampling data = random_resample(data) # Generate spectrogram data_spectrogram = spectrogram(data, nperseg=self.nperseg, noverlap=self.noverlap)[2] # Normalize spectrogram #data_transformed = norm_float(data_spectrogram, self.data_mean, self.data_std) data_norm = (data_spectrogram - np.mean(data_spectrogram)) / np.std(data_spectrogram) X[i, ] = np.expand_dims(data_norm, axis=3) # Assuming that the dataset names are unique (only 1 per label) y[i] = self.labels[ID] return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
f.set(ylabel='Counts', ylim=[0, 200], yticks=np.arange(0, 250, 50)) plt.title('Distribution of sequence lengths', fontsize=15) plt.show() fig.savefig('physionet_sequenceLenHist.png', bbox_inches='tight', dpi=150) # Based on this, we can set some parameters that we will use in the future fs = sampling_rates[0] # universal sampling rate sequence_length = sequence_length_max # will use the maximum sequence length from physionet_processing import extend_ts ts = h5file[ dataset_list[15]]['ecgdata'][:, 0] # Fetch one time series from the hdf5 file #ts = h5file[list(h5file.keys())[20]]['ecgdata'] ts_extended = extend_ts( ts, length=sequence_length_max) # Extend it to the maximum length time = np.arange(0, len(ts_extended)) / fs # Plot the the extended time series fig, ax1 = plt.subplots(figsize=(15, 3)) ax1.plot(time, ts_extended, 'b') ax1.set(xlabel='Time [s]', xlim=[0, time[-1]], xticks=np.arange(0, time[-1] + 5, 10)) ax1.set(ylabel='Potential [mV]') plt.title('Example ECG sequence with zero padding', fontsize=15) fig.savefig('physionet_ECG_padding.png', bbox_inches='tight', dpi=150) plt.show() from scipy import signal f1, PSD = signal.periodogram(ts_extended, fs, 'flattop', scaling='density')