Ejemplo n.º 1
0
    def load(self, backup_path=None, num_records=None, checkpoint_interval=100):
        samples_per_second = self.samples_per_second
        total_sample_size_seconds = self.sample_size_seconds * self.seq_len
        to_wavelet = self.to_wavelet

        if backup_path is not None:
            filename = ('{}.pt' if self.to_wavelet is None else '{}_transformed.pt').format(self.dataset_name)
            file_path = os.path.join(backup_path, filename)
            if not os.path.exists(backup_path):
                os.makedirs(backup_path)
        else:
            file_path = None

        if file_path is not None and os.path.isfile(file_path):
            data = torch.load(file_path)
            print('Loaded from backup')
            self.samples, self.labels = data['samples'], data['labels']
            return

        print('Reading records. sample size={}, frequency={}'.format(total_sample_size_seconds, samples_per_second))
        data, labels = read_records(self.dataset_name, self.data_path,
                                    sample_size_seconds=total_sample_size_seconds,
                                    samples_per_second=samples_per_second, num_records=num_records)
        labels = torch.tensor(labels)

        print(labels.shape)
        print(len(data))

        data = [split_sample(sample, self.sample_size_seconds) for sample in data]
        count = len(data)
        transformed_data = []

        print('Preparing {} samples'.format(count))

        num_samples = 0
        for sample_idx, sample in tqdm(enumerate(data[:count]), desc='Preprocessing examples'):
            wavelets = []

            for signal_idx, signal in enumerate(sample):
                signal = wfdb_processing.normalize_bound(signal.numpy())
                if to_wavelet is not None:
                    sw = to_wavelet(signal)
                else:
                    sw = signal
                wavelets.append(torch.tensor(sw))

            t = torch.stack(wavelets)
            transformed_data.append(t)
            num_samples += 1

        transformed_data = torch.stack(transformed_data)
        if backup_path is not None:
            torch.save({
                'samples': transformed_data,
                'labels': labels
            }, file_path)

        self.samples, self.labels = transformed_data, labels
Ejemplo n.º 2
0
    def test_normalize_bound(self):
        sig, _ = wfdb.rdsamp('sample-data/100')
        lb = -5
        ub = 15

        x = processing.normalize_bound(sig[:, 0], lb, ub)
        assert x.shape[0] == sig.shape[0]
        assert np.min(x) >= lb
        assert np.max(x) <= ub
Ejemplo n.º 3
0
    def test_normalize_bound(self):
        sig, _ = wfdb.rdsamp('sample-data/100')
        lb = -5
        ub = 15

        x = processing.normalize_bound(sig[:, 0], lb, ub)
        assert x.shape[0] == sig.shape[0]
        assert np.min(x) >= lb
        assert np.max(x) <= ub
def tsaug_generator(X_all, y_all, batch_size):
    """
    Generate Time Series data with sequence labels
    Data generator that yields training data as batches.

    1. Randomly selects one sample from time series signals
    2. Applies time series augmentations to X and Y
    3. Normalizes result for X data
    4. Reshapes data into correct format for training


    Parameters
    ----------
    X_all : 3D numpy array
        (N, seqlen, features=1)

    y_all : 3D numpy array (binary labels for my case)
        (N, seqlen, classes=1)
    batch_size : int
        Number of training examples in the batch
    Yields
    ------
    (X, y) : tuple
        Contains training samples with corresponding labels
    """
    while True:

        X = []
        y = []

        while len(X) < batch_size:
            random_sig_idx = np.random.randint(0, X_all.shape[0])
            x1 = X_all[random_sig_idx].flatten()
            y1 = y_all[random_sig_idx].flatten()

            # Augment X and y and normalize it again

            X_aug, y_aug = my_augmenter.augment(x1, y1)
            X_aug = normalize_bound(X_aug, lb=-1, ub=1)

            X.append(X_aug)
            y.append(y_aug)

        X = np.asarray(X)
        y = np.asarray(y)

        X = X.reshape(X.shape[0], X.shape[1], 1)
        y = y.reshape(y.shape[0], y.shape[1], 1).astype(int)

        yield (X, y)
Ejemplo n.º 5
0
def ecg_generator(name, signals, wgn, ma, bw, win_size, batch_size):
    """
       Generate ECG data with R-peak labels.

       Data generator that yields training data as batches. Every instance
       of training batch is composed as follows:
       1. Randomly select one ECG signal from given list of ECG signals
       2. Randomly select one window of given win_size from selected signal
       3. Check that window has at least one beat and that all beats are
          labled as normal
       4. Create label window corresponding the selected window
           -beats and four samples next to beats are labeled as 1 while
            rest of the samples are labeled as 0
       5. Normalize selected signal window from -1 to 1
       6. Add noise into signal window and normalize it again to (-1, 1)
       7. Add noisy signal and its labels to trainig batch
       8. Transform training batches to arrays of needed shape and yield
          training batch with corresponding labels when needed

       Parameters
       ----------
       signals : list
           List of ECG signals
       peaks : list
           List of peaks locations for the ECG signals
       labels : list
           List of labels (peak types) for the peaks
       ma : array
           Muscle artifact signal
       bw : array
           Baseline wander signal
       win_size : int
           Number of time steps in the training window
       batch_size : int
           Number of training examples in the batch

       Yields
       ------
       (X, y) : tuple
           Contains training samples with corresponding labels

       """

    print('processing')
    while True:
        x = []
        section = []
        noise_list = []
        while len(x) < batch_size:
            random_sig_idx = np.random.randint(0, len(signals))
            random_sig = signals[random_sig_idx]
            #print(random_sig_idx)

            # Select one window
            beg = np.random.randint(random_sig.shape[0] - win_size)
            end = beg + win_size
            #section = normalize_bound(section, lb=-1, ub=1)
            section.append(random_sig[beg:end])

            # Select data for window and normalize it (-1, 1)
            data_win = normalize_bound(random_sig[beg:end], lb=-1, ub=1)

            # Add noise into data window and normalize it again
            added_noise = get_noise(name, wgn, ma, bw, win_size)
            added_noise = normalize_bound(added_noise, lb=-1, ub=1)
            noise_list.append(added_noise)
            data_win = data_win + added_noise
            data_win = normalize_bound(data_win, lb=-1, ub=1)
            x.append(data_win)

        section = np.asarray(section)
        section = section.reshape(section.shape[0], section.shape[1], 1)
        #print(section, section.shape, type(section))
        x = np.asarray(x)
        print(x.shape)
        x = x.reshape(x.shape[0], x.shape[1], 1)
        #print(x)

        id = np.random.randint(0, len(x))
        plt.subplot(311)
        plt.plot(section[id])
        plt.title('original ' + name + ' ecg')
        plt.subplot(312)
        plt.plot(x[id])
        plt.title('noised ' + name + ' ecg')
        plt.subplot(313)
        plt.title('added noise')
        plt.plot(noise_list[id])
        #plt.savefig(name+'.png')
        plt.show()

        print('shape:', x.shape)
        serialization(name + '_original_ecg', section)
        serialization(name + '_noised_ecg', x)
        return x, section
Ejemplo n.º 6
0
def utama(filename):
    global timenow
    path_data = filename
    path_model = "Best Model 3 Classes.h5"
    ext = path_data.split(".")[-1]
    namefile = path_data.split('/')[4].split('.')[0]
    print(filename, ext, namefile)

    if ext == 'xml':
        with open(path_data) as fopen:
            file = fopen.read()
        data = xmltodict.parse(file, encoding='latin-1')
        raw = data['CardioXP']['StudyInfo']['WaveData'][1]['Data']
        signal = raw.split(' ')
        signal = np.array(signal, dtype=int)

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), signal[:2700])
        saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png'
        plt.savefig(saving_file_bef)

        signal = normalize_bound(signal, lb=0, ub=1)
        signal = denoising(signal)
        signal = signal[0:2700]

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), signal[:2700])
        saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png'
        plt.savefig(saving_file_aft)
    elif ext == 'dat':
        record = wfdb.rdrecord(path_data)
        record_dict = record.__dict__
        p_signal = record_dict['p_signal'][:, 0]

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), p_signal[:2700])
        saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png'
        plt.savefig(saving_file_bef)

        p_signal = normalize_bound(p_signal, lb=0, ub=1)
        p_signal = denoising(p_signal)
        signal = p_signal[0:2700]

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), signal[:2700])
        saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png'
        plt.savefig(saving_file_aft)
    elif ext == 'mat':
        data = scipy.io.loadmat(path_data)
        sampels = data['val'][0]

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), sampels[:2700])
        saving_file_bef = './app/static/AF/' + namefile + '-' + timenow + '-bef.png'
        plt.savefig(saving_file_bef)

        sampels = normalize_bound(sampels, lb=0, ub=1)
        sampels = denoising(sampels)
        signal = sampels[0:2700]

        plt.figure(figsize=(15, 10))
        plt.plot(range(2700), signal[:2700])
        saving_file_aft = './app/static/AF/' + namefile + '-' + timenow + '-aft.png'
        plt.savefig(saving_file_aft)
    else:
        sys.exit("Not A Valid Data")

    if len(signal) < 2700:
        sys.exit("Signal length is not sufficient")

    signal = np.reshape(signal, (1, 2700, 1))
    model = load_model(path_model)
    predicted_class = model.predict_classes(signal)[0]

    if predicted_class == 0:
        clear_session()
        print("Normal")
        return 'Normal', namefile + '-' + timenow
    elif predicted_class == 1:
        clear_session()
        print("Atrial Fibrilation")
        return 'Atrial Fibrilation', namefile + '-' + timenow
    else:
        clear_session()
        print("Non AF")
        return 'Non AF', namefile + '-' + timenow
Ejemplo n.º 7
0
for item in np.arange(len(path_split)):
    record = wfdb.rdrecord(path_split[item])
    record_dict = record.__dict__
    signal = record_dict['p_signal'][:, 0]
    annotation = wfdb.rdann(path_split[item], 'atr')
    ann_dict = annotation.__dict__
    symbol = ann_dict['symbol']
    peaks = ann_dict['sample']
    name = ann_dict['record_name']
    fs = ann_dict['fs']
    t1 = np.int(0.25 * fs)
    t2 = np.int(0.45 * fs)
    peak = np.arange(len(peaks))

    new_signal = utility.wavelet_transform(signal, 8, 'sym5')
    new_signal = normalize_bound(new_signal)

    beats = []
    labels = []
    for x in peak:
        if (peaks[x] - t1) > 0 and (peaks[x] +
                                    t2) < len(signal):  #Ini segmentasi
            #if (symbol[x] == 'A' or symbol[x] == 'L' or symbol[x] == 'N' or symbol[x] == '.' or symbol[x] == 'P' or symbol[x] == 'R' or symbol[x] == 'V' or symbol[x] == 'f' or symbol[x] == 'F' or symbol[x] == '!' or symbol[x] == 'j' ):
            if (symbol[x] == '~' or symbol[x] == '|' or symbol[x] == '+'
                    or symbol[x] == 'B' or symbol[x] == 'F' or symbol[x] == 'f'
                    or symbol[x] == 'Q' or symbol[x] == 'a'
                    or symbol[x] == 'J'):
                continue
            else:

                beat = new_signal[peaks[x] - t1:peaks[x] + t2]
Ejemplo n.º 8
0
def dataGeneration(data_path, csv_path, record_path):

    # initialize dataset
    dataset = pd.DataFrame(columns=['label', 'record'])

    if record_path == None:

        # a loop for each patient
        detail_path = data_path + '/'
        record_files = [
            i.split('.')[0] for i in os.listdir(detail_path)
            if (not i.startswith('.') and i.endswith('.hea'))
        ]

        Bar.check_tty = False
        bar = Bar('Processing',
                  max=len(record_files),
                  fill='#',
                  suffix='%(percent)d%%')

        # a loop for each record
        for record_name in record_files:

            # load record
            signal, info = wfdb.rdsamp(detail_path + record_name)

            fs = 200

            signal = processing.resample_sig(signal[:, 0], info['fs'], fs)[0]

            # set some parameters
            window_size_half = int(fs * 0.125 / 2)
            max_bpm = 230

            # detect QRS peaks
            qrs_inds = processing.gqrs_detect(signal, fs=fs)
            search_radius = int(fs * 60 / max_bpm)
            corrected_qrs_inds = processing.correct_peaks(
                signal,
                peak_inds=qrs_inds,
                search_radius=search_radius,
                smooth_window_size=150)

            average_qrs = 0
            count = 0
            for i in range(1, len(corrected_qrs_inds) - 1):
                start_ind = corrected_qrs_inds[i] - window_size_half
                end_ind = corrected_qrs_inds[i] + window_size_half + 1
                if start_ind < corrected_qrs_inds[
                        i - 1] or end_ind > corrected_qrs_inds[i + 1]:
                    continue
                average_qrs = average_qrs + signal[start_ind:end_ind]
                count = count + 1

            # remove outliers
            if count < 8:
                print('\noutlier detected, discard ' + record_name)
                continue

            average_qrs = average_qrs / count

            corrcoefs = []
            for i in range(1, len(corrected_qrs_inds) - 1):
                start_ind = corrected_qrs_inds[i] - window_size_half
                end_ind = corrected_qrs_inds[i] + window_size_half + 1
                if start_ind < corrected_qrs_inds[
                        i - 1] or end_ind > corrected_qrs_inds[i + 1]:
                    corrcoefs.append(-100)
                    continue
                corrcoef = pearsonr(signal[start_ind:end_ind], average_qrs)[0]
                corrcoefs.append(corrcoef)

            max_corr = list(map(corrcoefs.index, heapq.nlargest(8, corrcoefs)))

            index_corr = random.sample(
                list(itertools.permutations(max_corr, 8)), 100)

            for index in index_corr:
                # a temp dataframe to store one record
                record_temp = pd.DataFrame()

                signal_temp = []

                for i in index:
                    start_ind = corrected_qrs_inds[i + 1] - window_size_half
                    end_ind = corrected_qrs_inds[i + 1] + window_size_half + 1
                    sig = processing.normalize_bound(signal[start_ind:end_ind],
                                                     -1, 1)
                    signal_temp = np.concatenate((signal_temp, sig))

                record_temp = record_temp.append(pd.DataFrame(
                    signal_temp.reshape(-1, signal_temp.shape[0])),
                                                 ignore_index=True,
                                                 sort=False)
                record_temp['label'] = record_name
                record_temp['record'] = record_name

                # add it to final dataset
                dataset = dataset.append(record_temp,
                                         ignore_index=True,
                                         sort=False)

            bar.next()
        bar.finish()
    else:
        patient_folders = [
            i for i in os.listdir(data_path)
            if (not i.startswith('.') and i.startswith(record_path))
        ]

        Bar.check_tty = False
        bar = Bar('Processing',
                  max=len(patient_folders),
                  fill='#',
                  suffix='%(percent)d%%')
        # a loop for each patient
        for patient_name in patient_folders:
            detail_path = data_path + patient_name + '/'
            record_files = [
                i.split('.')[0] for i in os.listdir(detail_path)
                if i.endswith('.hea')
            ]

            # a loop for each record
            for record_name in record_files:

                # load record
                signal, info = wfdb.rdsamp(detail_path + record_name)

                fs = 200

                signal = processing.resample_sig(signal[:, 0], info['fs'],
                                                 fs)[0]

                # set some parameters
                window_size_half = int(fs * 0.125 / 2)
                max_bpm = 230

                # detect QRS peaks
                qrs_inds = processing.gqrs_detect(signal, fs=fs)
                search_radius = int(fs * 60 / max_bpm)
                corrected_qrs_inds = processing.correct_peaks(
                    signal,
                    peak_inds=qrs_inds,
                    search_radius=search_radius,
                    smooth_window_size=150)

                average_qrs = 0
                count = 0
                for i in range(1, len(corrected_qrs_inds) - 1):
                    start_ind = corrected_qrs_inds[i] - window_size_half
                    end_ind = corrected_qrs_inds[i] + window_size_half + 1
                    if start_ind < corrected_qrs_inds[
                            i - 1] or end_ind > corrected_qrs_inds[i + 1]:
                        continue
                    average_qrs = average_qrs + signal[start_ind:end_ind]
                    count = count + 1

                # remove outliers
                if count < 8:
                    print('\noutlier detected, discard ' + record_name +
                          ' of ' + patient_name)
                    continue

                average_qrs = average_qrs / count

                corrcoefs = []
                for i in range(1, len(corrected_qrs_inds) - 1):
                    start_ind = corrected_qrs_inds[i] - window_size_half
                    end_ind = corrected_qrs_inds[i] + window_size_half + 1
                    if start_ind < corrected_qrs_inds[
                            i - 1] or end_ind > corrected_qrs_inds[i + 1]:
                        corrcoefs.append(-100)
                        continue
                    corrcoef = pearsonr(signal[start_ind:end_ind],
                                        average_qrs)[0]
                    corrcoefs.append(corrcoef)

                max_corr = list(
                    map(corrcoefs.index, heapq.nlargest(8, corrcoefs)))

                index_corr = random.sample(
                    list(itertools.permutations(max_corr, 8)), 100)

                for index in index_corr:
                    # a temp dataframe to store one record
                    record_temp = pd.DataFrame()

                    signal_temp = []

                    for i in index:
                        start_ind = corrected_qrs_inds[i +
                                                       1] - window_size_half
                        end_ind = corrected_qrs_inds[i +
                                                     1] + window_size_half + 1
                        sig = processing.normalize_bound(
                            signal[start_ind:end_ind], -1, 1)
                        signal_temp = np.concatenate((signal_temp, sig))

                    record_temp = record_temp.append(pd.DataFrame(
                        signal_temp.reshape(-1, signal_temp.shape[0])),
                                                     ignore_index=True,
                                                     sort=False)
                    record_temp['label'] = patient_name
                    record_temp['record'] = record_name

                    # add it to final dataset
                    dataset = dataset.append(record_temp,
                                             ignore_index=True,
                                             sort=False)

            bar.next()
        bar.finish()

    # save for further use
    dataset.to_csv(csv_path, index=False)

    print('processing completed')
Ejemplo n.º 9
0
def ecg_generator(signals, peaks, labels, ma, bw, win_size, batch_size):
    """
    Generate ECG data with R-peak labels.

    Data generator that yields training data as batches. Every instance
    of training batch is composed as follows:
    1. Randomly select one ECG signal from given list of ECG signals
    2. Randomly select one window of given win_size from selected signal
    3. Check that window has at least one beat and that all beats are
       labled as normal
    4. Create label window corresponding the selected window
        -beats and four samples next to beats are labeled as 1 while
         rest of the samples are labeled as 0
    5. Normalize selected signal window from -1 to 1
    6. Add noise into signal window and normalize it again to (-1, 1)
    7. Add noisy signal and its labels to trainig batch
    8. Transform training batches to arrays of needed shape and yield
       training batch with corresponding labels when needed

    Parameters
    ----------
    signals : list
        List of ECG signals
    peaks : list
        List of peaks locations for the ECG signals
    labels : list
        List of labels (peak types) for the peaks
    ma : array
        Muscle artifact signal
    bw : array
        Baseline wander signal
    win_size : int
        Number of time steps in the training window
    batch_size : int
        Number of training examples in the batch

    Yields
    ------
    (X, y) : tuple
        Contains training samples with corresponding labels

    """
    while True:

        X = []
        y = []

        while len(X) < batch_size:
            random_sig_idx = np.random.randint(0, len(signals))
            random_sig = signals[random_sig_idx]
            p4sig = peaks[random_sig_idx]
            plabels = labels[random_sig_idx]

            # Select one window
            beg = np.random.randint(random_sig.shape[0]-win_size)
            end = beg + win_size

            # Select peaks that fall into selected window.
            # Buffer of 3 to the window edge is needed as labels are
            # inserted also next to point)
            p_in_win = p4sig[(p4sig >= beg+3) & (p4sig <= end-3)]-beg

            # Check that there is at least one peak in the window
            if p_in_win.shape[0] >= 1:

                # Select labels that fall into selected window
                lab_in_win = plabels[(p4sig >= beg+3) & (p4sig <= end-3)]

                # Check that every beat in the window is normal beat
                if np.all(lab_in_win == 1):

                    # Create labels for data window
                    window_labels = np.zeros(win_size)
                    np.put(window_labels, p_in_win, lab_in_win)

                    # Put labels also next to peak
                    np.put(window_labels, p_in_win+1, lab_in_win)
                    np.put(window_labels, p_in_win+2, lab_in_win)
                    np.put(window_labels, p_in_win-1, lab_in_win)
                    np.put(window_labels, p_in_win-2, lab_in_win)

                    # Select data for window and normalize it (-1, 1)
                    data_win = normalize_bound(random_sig[beg:end],
                                               lb=-1, ub=1)

                    # Add noise into data window and normalize it again
                    data_win = data_win + get_noise(ma, bw, win_size)
                    data_win = normalize_bound(data_win, lb=-1, ub=1)

                    X.append(data_win)
                    y.append(window_labels)

        X = np.asarray(X)
        y = np.asarray(y)

        X = X.reshape(X.shape[0], X.shape[1], 1)
        y = y.reshape(y.shape[0], y.shape[1], 1).astype(int)

        yield (X, y)