Exemple #1
0
def extract_rpeak_features(row, signal):
    """
    Extract the R peak features.

    :param row: a `BaseDataset` row to calculate the features from
    :param signal: the raw ECG signal
    :return: `row` with the added features
    """
    ecg_cleaned = nk.ecg_clean(signal, sampling_rate=row.Fs)

    peaks, info = nk.ecg_peaks(ecg_cleaned, sampling_rate=row.Fs)
    r_peaks_sec = np.where(peaks['ECG_R_Peaks'].to_numpy() == 1)[0].astype(
        np.float32)
    r_peaks_sec /= row.Fs  # get R-peak times in seconds

    num_peaks = len(r_peaks_sec)
    if num_peaks > 2:
        hrv = nk.hrv(peaks, sampling_rate=row.Fs, show=False).iloc[0]
        row = row.append(hrv)
    row['N_QRS'] = num_peaks

    rr = np.diff(r_peaks_sec)
    row = row.append(get_statistics(rr, 'RR'))
    row = row.append(get_statistics(signal, 'signal'))

    return row, info
Exemple #2
0
def test_hrv():

    ecg = nk.ecg_simulate(duration=60,
                          sampling_rate=1000,
                          heart_rate=110,
                          random_state=42)

    _, peaks = nk.ecg_process(ecg, sampling_rate=1000)

    ecg_hrv = nk.hrv(peaks, sampling_rate=1000)

    columns = [
        'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN',
        'HRV_CVSD', 'HRV_MedianNN', 'HRV_MadNN', 'HRV_MCVNN', 'HRV_IQRNN',
        'HRV_pNN50', 'HRV_pNN20', 'HRV_TINN', 'HRV_HTI', 'HRV_ULF', 'HRV_VLF',
        'HRV_LF', 'HRV_HF', 'HRV_VHF', 'HRV_LFHF', 'HRV_LFn', 'HRV_HFn',
        'HRV_LnHF', 'HRV_SD1', 'HRV_SD2', 'HRV_SD1SD2', 'HRV_S', 'HRV_CSI',
        'HRV_CVI', 'HRV_CSI_Modified', 'HRV_PIP', 'HRV_IALS', 'HRV_PSS',
        'HRV_PAS', 'HRV_GI', 'HRV_SI', 'HRV_AI', 'HRV_PI', 'HRV_C1d',
        'HRV_C1a', 'HRV_SD1d', 'HRV_SD1a', 'HRV_C2d', 'HRV_C2a', 'HRV_SD2d',
        'HRV_SD2a', 'HRV_Cd', 'HRV_Ca', 'HRV_SDNNd', 'HRV_SDNNa', 'HRV_ApEn',
        'HRV_SampEn'
    ]

    assert all(elem in np.array(ecg_hrv.columns.values, dtype=object)
               for elem in columns)
Exemple #3
0
def test_hrv():

    ecg = nk.ecg_simulate(duration=60,
                          sampling_rate=1000,
                          heart_rate=110,
                          random_state=42)

    _, peaks = nk.ecg_process(ecg, sampling_rate=1000)

    ecg_hrv = nk.hrv(peaks, sampling_rate=1000)

    assert all(elem in [
        'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN',
        'HRV_CVSD', 'HRV_MedianNN', 'HRV_MadNN', 'HRV_MCVNN', 'HRV_pNN50',
        'HRV_pNN20', 'HRV_TINN', 'HRV_HTI', 'HRV_ULF', 'HRV_VLF', 'HRV_LF',
        'HRV_HF', 'HRV_VHF', 'HRV_LFHF', 'HRV_LFn', 'HRV_HFn', 'HRV_LnHF',
        'HRV_SD1', 'HRV_SD2', 'HRV_SD2SD1', 'HRV_CSI', 'HRV_CVI',
        'HRV_CSI_Modified', 'HRV_SampEn'
    ] for elem in np.array(ecg_hrv.columns.values, dtype=str))
Exemple #4
0
def neurokit_index(request):
    data = pd.read_csv(
        "/Users/siyuqian/Study/django-docker/712AF22B_Mar11_14-07-59.csv")
    # Generate 15 seconds of PPG signal (recorded at 250 samples / second)
    # ppg = nk.ppg_simulate(duration=15, sampling_rate=250, heart_rate=70)
    ppg = nk.ppg_process(data['PPG'], sampling_rate=50)

    # Clear the noise
    ppg_clean = nk.ppg_clean(ppg)

    # Peaks
    peaks = nk.ppg_findpeaks(ppg_clean, sampling_rate=100)

    # Compute HRV indices
    hrv_indices = nk.hrv(peaks, sampling_rate=100, show=True)
    result = hrv_indices.to_json()
    parsed = json.loads(result)
    context = {'response': json.dumps(parsed)}
    return render(request, 'neurokit/neurokit_index.html', context)
Exemple #5
0
             pd.read_csv("../../data/mit_normal/Rpeaks.csv"),
             pd.read_csv("../../data/fantasia/Rpeaks.csv")]

# Get results
all_results = pd.DataFrame()

for file in datafiles:
    for database in np.unique(file["Database"]):

        print(str(database))
        data = file[file["Database"] == database]

        for participant in np.unique(data["Participant"]):

            data_participant = data[data["Participant"] == participant]
            sampling_rate = np.unique(data_participant["Sampling_Rate"])[0]
            rpeaks = data_participant["Rpeaks"].values

            results = nk.hrv(rpeaks, sampling_rate=sampling_rate)
            results["Participant"] = participant
            results["Database"] = database
            results["Recording_Length"] = rpeaks[-1] / sampling_rate / 60

            all_results = pd.concat([all_results, results], axis=0)

all_results.to_csv("data.csv", index=False)




Exemple #6
0
def create_df(dataframe: pd.DataFrame) -> pd.DataFrame:
    # get lengths of signals for each sample
    lengths = []
    width = dataframe.shape[1]

    for row in dataframe.index.tolist():
        temp_width = width
        for item in dataframe.loc[row][::-1]:
            if not pd.isna(item) and isinstance(item, float):
                temp_width -= 1
                break

            temp_width -= 1

        lengths.append(temp_width)

    """
    README
    
    For the following features we measured: [mean, median, 5 % percentile, 95 % percentile, standard deviation]
    R-peak location were retrieved by nk.ecg_peaks
    Q-peak and S-location were retrieved by nk.ecg_delineate
    
    ?_ampl_*        ?-Peak amplitude
    ?_nr_peaks      number of ?-Peaks
    ?_diff_*        Interval between ?-Peaks
    QRS_diff_*      QRS duration
    len_*           length of signal
    Qual_*          quality of signal measured with nk.ecg_quality
    sign_*          signal
    
    Also the output from nk.hrv_time which contains different measurements for the heart rate variation (HRV*) was added
    
    Additionally one 'typical' heartbeat was greated (all length 180):
    
    MN_*            mean signal
    MD_*            median signal
    P5_*            5 % percentile signal
    P95_*           95 % percentile signal
    SD_*            standard deviation of signal
    """

    names = ['R_ampl_mean', 'R_ampl_median', 'R_ampl_perc5', 'R_ampl_perc95', 'R_ampl_sd', 'R_nr_peaks',
             'len_mean', 'len_median', 'len_perc5', 'len_perc95', 'len_sd',
             'sign_mean', 'sign_median', 'sign_perc5', 'sign_perc95', 'sign_sd',
             'Qual_mean', 'Qual_median', 'Qual_perc5', 'Qual_perc95', 'Qual_sd',
             'Q_ampl_mean', 'Q_ampl_median', 'Q_ampl_perc5', 'Q_ampl_perc95', 'Q_ampl_sd', 'Q_nr_peaks',
             'Q_diff_mean', 'Q_diff_median', 'Q_diff_perc5', 'Q_diff_perc95', 'Q_diff_sd',
             'S_ampl_mean', 'S_ampl_median', 'S_ampl_perc5', 'S_ampl_perc95', 'S_ampl_sd', 'S_nr_peaks',
             'S_diff_mean', 'S_diff_median', 'S_diff_perc5', 'S_diff_perc95', 'S_diff_sd',
             'P_ampl_mean', 'P_ampl_median', 'P_ampl_perc5', 'P_ampl_perc95', 'P_ampl_sd', 'P_nr_peaks',
             'T_ampl_mean', 'T_ampl_median', 'T_ampl_perc5', 'T_ampl_perc95', 'T_ampl_sd', 'T_nr_peaks',
             'QRS_diff_mean', 'QRS_diff_median', 'QRS_diff_perc5', 'QRS_diff_perc95', 'QRS_diff_sd',
             'PR_diff_mean', 'PR_diff_median', 'PR_diff_perc5', 'PR_diff_perc95', 'PR_diff_sd',
             'RT_diff_mean', 'RT_diff_median', 'RT_diff_perc5', 'RT_diff_perc95', 'RT_diff_sd',
             'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN', 'HRV_CVSD', 'HRV_MedianNN',
             'HRV_MadNN', 'HRV_MCVNN', 'HRV_IQRNN', 'HRV_pNN50', 'HRV_pNN20', 'HRV_TINN', 'HRV_HTI',
             'HRV_ULF','HRV_VLF','HRV_LF','HRV_HF','HRV_VHF','HRV_LFHF','HRV_LFn','HRV_HFn', 	'HRV_LnHF',
             'HRV_SD1','HRV_SD2', 'HRV_SD1SD2','HRV_S','HRV_CSI','HRV_CVI','HRV_CSI_Modified', 'HRV_PIP',
             'HRV_IALS','HRV_PSS','HRV_PAS','HRV_GI','HRV_SI','HRV_AI','HRV_PI','HRV_C1d','HRV_C1a','HRV_SD1d',
             'HRV_SD1a','HRV_C2d','HRV_C2a','HRV_SD2d','HRV_SD2a','HRV_Cd','HRV_Ca','HRV_SDNNd','HRV_SDNNa','HRV_ApEn',
             'HRV_SampEn','J_LF','J_HF','J_L/H']


    template_len = 180

    mean_names = ['MN_' + str(index) for index in range(template_len)]
    median_names = ['MD_' + str(index) for index in range(template_len)]
    perc5_names = ['P5_' + str(index) for index in range(template_len)]
    perc95_names = ['P95_' + str(index) for index in range(template_len)]
    sd_names = ['SD_' + str(index) for index in range(template_len)]

    wavelet = 'db3'

    wl_len = int(np.floor((template_len + pywt.Wavelet(wavelet).dec_len - 1) / 2))

    wl_mean_names = ['WLMN_' + str(index) for index in range(2*wl_len)]
    wl_median_names = ['WLMD_' + str(index) for index in range(2*wl_len)]
    wl_perc5_names = ['WLP5_' + str(index) for index in range(2*wl_len)]
    wl_perc95_names = ['WLP95_' + str(index) for index in range(2*wl_len)]
    wl_sd_names = ['WLSD_' + str(index) for index in range(2*wl_len)]

    typical_signal_names = mean_names + median_names + perc5_names + perc95_names + sd_names + wl_mean_names + \
                           wl_median_names + wl_perc5_names + wl_perc95_names + wl_sd_names

    names += typical_signal_names

    data = np.empty([dataframe.shape[0], len(names)])

    iteration = 0
    for row_index, row in dataframe.iterrows():
        print(row_index)

        # Retrieve ECG data
        ecg_signal = row[:lengths[iteration] + 1]
        ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate=SAMPLING_RATE)

        # Find R-peaks
        peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=SAMPLING_RATE)

        # R amplitude
        R_amplitudes = ecg_signal[info['ECG_R_Peaks']]

        # Check if the signal is flipped
        # Check if we have enough peaks to retrieve more information
        if len(R_amplitudes) > 4:

            _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False)

            # Q amplitude

            # remove nan values
            Q_amplitudes = [ecg_signal[peak_index] if str(peak_index) != 'nan' else - np.infty for peak_index in
                            waves_peak['ECG_Q_Peaks']]

            if np.sum([1 if np.abs(rpeak) > np.abs(Q_amplitudes[index]) else -1 for index, rpeak in
                       enumerate(R_amplitudes)]) < 0:
                print("flip", row_index)

                ecg_signal = -ecg_signal

                peaks, info = nk.ecg_peaks(ecg_signal, sampling_rate=300)

                # R amplitude
                R_amplitudes = ecg_signal[info['ECG_R_Peaks']]

                if len(R_amplitudes) > 4:
                    _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=300, show=False)

        data_temp = []
        if len(R_amplitudes) > 0:
            data_temp = [np.mean(R_amplitudes),
                         np.median(R_amplitudes),
                         np.percentile(R_amplitudes, q=5),
                         np.percentile(R_amplitudes, q=95),
                         np.std(R_amplitudes),
                         len(R_amplitudes)]
        else:
            empty = np.empty([6])
            empty[:] = np.NaN
            data_temp += empty.tolist()

        # length of signal
        data_new = [np.mean(lengths[iteration] / SAMPLING_RATE),
                    np.median(lengths[iteration] / SAMPLING_RATE),
                    np.percentile(lengths[iteration] / SAMPLING_RATE, q=5),
                    np.percentile(lengths[iteration] / SAMPLING_RATE, q=95),
                    np.std(lengths[iteration] / SAMPLING_RATE)]

        data_temp += data_new

        # signal
        data_new = [np.mean(ecg_signal),
                    np.median(ecg_signal),
                    np.percentile(ecg_signal, q=5),
                    np.percentile(ecg_signal, q=95),
                    np.std(ecg_signal)]

        data_temp += data_new

        # Check if we have enough peaks to retrieve more information
        if len(R_amplitudes) > 4:

            quality = nk.ecg_quality(ecg_signal, sampling_rate=SAMPLING_RATE)
            data_new = [np.mean(quality),
                        np.median(quality),
                        np.percentile(quality, q=5),
                        np.percentile(quality, q=95),
                        np.std(quality)]

            data_temp += data_new

            # Delineate the ECG signal
            # “ECG_P_Peaks”, “ECG_Q_Peaks”, “ECG_S_Peaks”, “ECG_T_Peaks”, “ECG_P_Onsets”, “ECG_T_Offsets”

            # _, waves_peak = nk.ecg_delineate(ecg_signal, info, sampling_rate=SAMPLING_RATE, show=False)

            # Q amplitude

            # remove nan values
            Q_peaks = [peak for peak in waves_peak['ECG_Q_Peaks'] if str(peak) != 'nan']

            if len(Q_peaks) > 0:
                Q_amplitudes = ecg_signal[Q_peaks]

                data_new = [np.mean(Q_amplitudes),
                            np.median(Q_amplitudes),
                            np.percentile(Q_amplitudes, q=5),
                            np.percentile(Q_amplitudes, q=95),
                            np.std(Q_amplitudes),
                            len(Q_amplitudes)]

                data_temp += data_new
            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            # more than 1 Q-Peak => can build interval[s]
            if len(Q_peaks) > 1:
                Q_peaks_diff = [(Q_peaks[index + 1] - Q_peaks[index]) / SAMPLING_RATE
                                for index, item in enumerate(Q_peaks[:len(Q_peaks) - 1])]

                # QQ interval

                data_new = [np.mean(Q_peaks_diff),
                            np.median(Q_peaks_diff),
                            np.percentile(Q_peaks_diff, q=5),
                            np.percentile(Q_peaks_diff, q=95),
                            np.std(Q_peaks_diff)]

                data_temp += data_new

            # 0 or 1 Q-peak = no interval => return nan
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # S amplitude

            # remove nan values
            S_peaks = [peak for peak in waves_peak['ECG_S_Peaks'] if str(peak) != 'nan']

            if len(S_peaks) > 0:
                S_amplitudes = ecg_signal[S_peaks]

                data_new = [np.mean(S_amplitudes),
                            np.median(S_amplitudes),
                            np.percentile(S_amplitudes, q=5),
                            np.percentile(S_amplitudes, q=95),
                            np.std(S_amplitudes),
                            len(S_amplitudes)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            # more than one S-peak
            if len(S_peaks) > 1:
                S_peaks_diff = [(S_peaks[index + 1] - S_peaks[index]) / SAMPLING_RATE
                                for index, item in enumerate(S_peaks[:len(S_peaks) - 1])]

                # SS interval

                data_new = [np.mean(S_peaks_diff),
                            np.median(S_peaks_diff),
                            np.percentile(S_peaks_diff, q=5),
                            np.percentile(S_peaks_diff, q=95),
                            np.std(S_peaks_diff)]

                data_temp += data_new

            # 0 or 1 S-peak = no interval => return nan
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            P_peaks = [peak for peak in waves_peak['ECG_P_Peaks'] if str(peak) != 'nan']

            if len(P_peaks) > 0:
                P_amplitudes = ecg_signal[P_peaks]

                data_new = [np.mean(P_amplitudes),
                            np.median(P_amplitudes),
                            np.percentile(P_amplitudes, q=5),
                            np.percentile(P_amplitudes, q=95),
                            np.std(P_amplitudes),
                            len(P_amplitudes)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()

            T_peaks = [peak for peak in waves_peak['ECG_T_Peaks'] if str(peak) != 'nan']

            if len(T_peaks) > 0:
                T_peaks = ecg_signal[T_peaks]

                data_new = [np.mean(T_peaks),
                            np.median(T_peaks),
                            np.percentile(T_peaks, q=5),
                            np.percentile(T_peaks, q=95),
                            np.std(T_peaks),
                            len(T_peaks)]

                data_temp += data_new

            else:
                empty = np.empty([6])
                empty[:] = np.NaN
                empty[5] = 0
                data_temp += empty.tolist()


            # QRS interval

            QRS_peaks_diff = []

            # compute difference between Q and S peak
            for index in range(len(waves_peak['ECG_Q_Peaks'])):
                if not (np.isnan(waves_peak['ECG_Q_Peaks'][index]) or np.isnan(waves_peak['ECG_S_Peaks'][index])):
                    QRS_peaks_diff.append(
                        (waves_peak['ECG_S_Peaks'][index] - waves_peak['ECG_Q_Peaks'][index]) / SAMPLING_RATE)

            if len(QRS_peaks_diff) > 0:
                data_new = [np.mean(QRS_peaks_diff),
                            np.median(QRS_peaks_diff),
                            np.percentile(QRS_peaks_diff, q=5),
                            np.percentile(QRS_peaks_diff, q=95),
                            np.std(QRS_peaks_diff)]

                data_temp += data_new

            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # PR interval

            PR_peaks_diff = []

            # compute difference between P and R peak
            for index in range(len(waves_peak['ECG_P_Peaks'])):
                if not np.isnan(waves_peak['ECG_P_Peaks'][index]):
                    PR_peaks_diff.append(
                        (info['ECG_R_Peaks'][index] - waves_peak['ECG_P_Peaks'][index]) / SAMPLING_RATE)

            if len(PR_peaks_diff) > 0:
                data_new = [np.mean(PR_peaks_diff),
                            np.median(PR_peaks_diff),
                            np.percentile(PR_peaks_diff, q=5),
                            np.percentile(PR_peaks_diff, q=95),
                            np.std(PR_peaks_diff)]

                data_temp += data_new
            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # RT interval

            RT_peaks_diff = []

            # compute difference between P and R peak
            for index in range(len(waves_peak['ECG_T_Peaks'])):
                if not np.isnan(waves_peak['ECG_T_Peaks'][index]):
                    RT_peaks_diff.append(
                        (waves_peak['ECG_T_Peaks'][index] - info['ECG_R_Peaks'][index]) / SAMPLING_RATE)

            if len(RT_peaks_diff) > 0:
                data_new = [np.mean(RT_peaks_diff),
                            np.median(PR_peaks_diff),
                            np.percentile(RT_peaks_diff, q=5),
                            np.percentile(RT_peaks_diff, q=95),
                            np.std(RT_peaks_diff)]

                data_temp += data_new

            else:
                empty = np.empty([5])
                empty[:] = np.NaN
                data_temp += empty.tolist()

            # Extract clean EDA and SCR features
            # explanation of features:
            # https://neurokit2.readthedocs.io/en/latest/functions.html?highlight=hrv%20time#neurokit2.hrv.hrv_time

            hrv_time = nk.hrv(peaks, sampling_rate=SAMPLING_RATE, show=False)

            data_new = hrv_time.values.tolist()[0]

            data_temp += data_new

            # Jannik
            # http://www.paulvangent.com/2016/03/21/analyzing-a-discrete-heart-rate-signal-using-python-part-2/
            rpeaks = info['ECG_R_Peaks']
            r_interval = [rpeaks[index+1]-rpeaks[index] for index in range(len(rpeaks)-1)]
            RR_x_new = np.linspace(rpeaks[0],rpeaks[-2],rpeaks[-2])
            f = interp1d(rpeaks[:-1], r_interval, kind='cubic')

            n = lengths[iteration] + 1 # Length of the signal
            frq = np.fft.fftfreq(n, d=(1 / SAMPLING_RATE)) # divide the bins into frequency categories
            frq = frq[range(int(n/2))] # Get single side of the frequency range

            Y = np.fft.fft(f(RR_x_new))/n # Calculate FFT

            try:
                Y = Y[range(int(n / 2))]
                lf = np.trapz(abs(Y[(frq >= 0.04) & (frq <= 0.15)]))

                hf = np.trapz(abs(Y[(frq >= 0.16) & (frq <= 0.5)]))  # Do the same for 0.16-0.5Hz (HF)

                data_new = [lf, hf, lf / hf]

                data_temp += data_new
            except IndexError as err:
                print(err)
                data_temp += [None, None, None]

        # if we don't have enough R peaks return vector of nan's
        else:
            empty = np.empty([len(names) - 16 - len(typical_signal_names)])
            empty[:] = np.NaN
            data_temp += empty.tolist()

        # Create a 'typical' heartbeat

        # Scaler = StandardScaler()
        # ecg_signal = Scaler.fit_transform(X=ecg_signal.reshape(-1, 1)).reshape(1, -1)[0].tolist()

        out = ecg.ecg(signal=ecg_signal, sampling_rate=SAMPLING_RATE, show=False)

        mean = np.mean(out['templates'], axis=0)
        median = np.median(out['templates'], axis=0)
        perc5 = np.percentile(out['templates'].astype(np.float64), axis=0, q=5)
        perc95 = np.percentile(out['templates'].astype(np.float64), axis=0, q=95)
        std = np.std(out['templates'].astype(np.float64), axis=0)

        data_new = np.concatenate((mean, median, perc5, perc95, std)).tolist()

        data_temp += data_new

        (wl_mean_cA, wl_mean_cD) = pywt.dwt(np.mean(out['templates'], axis=0),
                                            'db3', 'periodic')
        (wl_median_cA, wl_median_cD) = pywt.dwt(np.median(out['templates'], axis=0),
                                                'db3', 'periodic')
        (wl_perc5_cA, wl_perc5_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=5),
                                              'db3', 'periodic')
        (wl_perc95_cA, wl_perc95_cD) = pywt.dwt(np.percentile(out['templates'].astype(np.float64), axis=0, q=95),
                                                'db3', 'periodic')
        (wl_sd_cA, wl_sd_cD) = pywt.dwt(np.std(out['templates'].astype(np.float64), axis=0),
                                        'db3', 'periodic')

        data_new = np.concatenate((wl_mean_cA, wl_mean_cD,
                                   wl_median_cA, wl_median_cD,
                                   wl_perc5_cA, wl_perc5_cD,
                                   wl_perc95_cA, wl_perc95_cD,
                                   wl_sd_cA, wl_sd_cD)).tolist()

        data_temp += data_new

        data[iteration] = data_temp

        iteration += 1

    features = pd.DataFrame(data, columns=names)

    return features
fig = plt.gcf()
fig.set_size_inches(10, 6)
fig.savefig("README_signalprocessing.png", dpi=300, h_pad=3)

# =============================================================================
# Heart Rate Variability
# =============================================================================

# Download data
data = nk.data("bio_resting_8min_100hz")

# Find peaks
peaks, info = nk.ecg_peaks(data["ECG"], sampling_rate=100)

# Compute HRV indices
hrv = nk.hrv(peaks, sampling_rate=100, show=True)
hrv

# Save plot
fig = plt.gcf()
fig.set_size_inches(10 * 1.5, 6 * 1.5, forward=True)
fig.savefig("README_hrv.png", dpi=300, h_pad=3)

# =============================================================================
# ECG Delineation
# =============================================================================

# Download data
ecg_signal = nk.data(dataset="ecg_3000hz")['ECG']

# Extract R-peaks locations
Exemple #8
0
def extract_features(ecg_df, save=True, mode='train'):  ## feature amount = 42
    '''
    input: raw X_train_df
    '''

    features_names_timehvr = [
        'sdNN', 'meanNN', 'CVSD', 'cvNN', 'RMSSD', 'medianNN', 'madNN',
        'mcvNN', 'pNN50', 'pNN20'
    ]
    additional_peaks = [
        "ECG_P_Peaks", "ECG_T_Peaks", "ECG_P_Onsets", "ECG_P_Offsets",
        "ECG_T_Onsets", "ECG_T_Offsets", "ECG_R_Onsets", "ECG_R_Offsets"
    ]

    # MAIN FEATURES, INITIALIZING #
    print('-- EXTRACTING MAIN FEATURES --')
    values = ecg_df.apply(
        lambda x: ecg.ecg(x.dropna(), sampling_rate=300, show=False), axis=1)
    features_df = pd.DataFrame({
        'rpeaks':
        values.apply(lambda x: x['rpeaks']),
        'filtered':
        values.apply(lambda x: x['filtered']),
        'templates':
        values.apply(lambda x: x['templates'])
    })
    print('-- VALUES EXTRACTION DONE --')
    # values_nk2 = ecg_df.apply(lambda x: ecg_process_custom(x.dropna(), sampling_rate=300)[0], axis=1)
    values_nk2 = ecg_df.apply(
        lambda x: ecg_process_custom(x.dropna(), sampling_rate=300), axis=1)
    values_nk2 = values_nk2.apply(lambda x: x[0] if x == x else np.nan)
    for v in additional_peaks:
        peak_name = k[0]
        features_df[k] = values_nk2.apply(
            lambda x: np.array(x[v]) if type(x) == pd.core.frame.DataFrame else
            np.nan)  # problem in this line change x==x
        features_df[k] = features_df[k].apply(
            lambda x: np.where(x == 1)[0] if type(x) == np.ndarray else np.nan)
        print('----->' + peak_name + ' done.')
    print('-- VALUES_NK2 EXTRACTION DONE --')

    # R PEAK FEATURES
    features_df['R_peaks'] = features_df.apply(
        lambda x: x['filtered'][x['rpeaks']], axis=1)

    features_df['mean_rvalues'] = features_df.apply(
        lambda x: np.mean(x['R_peaks']), axis=1)
    features_df['min_rvalues'] = features_df.apply(
        lambda x: np.min(x['R_peaks']), axis=1)
    features_df['max_rvalues'] = features_df.apply(
        lambda x: np.max(x['R_peaks']), axis=1)
    features_df['std_rvalues'] = features_df.apply(
        lambda x: np.std(x['R_peaks']), axis=1)
    features_df['median_rvalues'] = features_df.apply(
        lambda x: np.median(x['R_peaks']), axis=1)
    print('-- R PEEK EXTRACTION DONE --')

    # ADDITIONAL PEAK FEATURES
    for k in additional_peaks:
        features_df[k] = features_df.apply(
            lambda x: x['filtered'][x[k]]
            if type(x[k]) == np.ndarray else np.nan,
            axis=1)

        peak_name = k[0]
        features_df['mean_' + peak_name + 'values'] = features_df.apply(
            lambda x: np.mean(x[k])
            if (type(x[k]) == np.ndarray and len(x[k]) != 0) else np.nan,
            axis=1)
        features_df['min_' + peak_name + 'values'] = features_df.apply(
            lambda x: np.min(x[k])
            if (type(x[k]) == np.ndarray and len(x[k]) != 0) else np.nan,
            axis=1)
        features_df['max_' + peak_name + 'values'] = features_df.apply(
            lambda x: np.max(x[k])
            if (type(x[k]) == np.ndarray and len(x[k]) != 0) else np.nan,
            axis=1)
        features_df['std_' + peak_name + 'values'] = features_df.apply(
            lambda x: np.std(x[k])
            if (type(x[k]) == np.ndarray and len(x[k]) != 0) else np.nan,
            axis=1)
        features_df['median_' + peak_name + 'values'] = features_df.apply(
            lambda x: np.median(x[k])
            if (type(x[k]) == np.ndarray and len(x[k]) != 0) else np.nan,
            axis=1)
        print('----->' + peak_name + ' done.')
    print('-- OTHER PEEKS EXTRACTION DONE --')

    # POWER
    features_df['power'] = features_df['filtered'].apply(
        lambda x: np.sum(np.square(x)) / x.shape[0])

    # HRV FEATURES
    features_names_hvr = [
        'HRV_RMSSD', 'HRV_MeanNN', 'HRV_SDNN', 'HRV_SDSD', 'HRV_CVNN',
        'HRV_CVSD', 'HRV_MedianNN', 'HRV_MadNN', 'HRV_MCVNN', 'HRV_IQRNN',
        'HRV_pNN50', 'HRV_pNN20', 'HRV_TINN', 'HRV_HTI', 'HRV_ULF', 'HRV_VLF',
        'HRV_LF', 'HRV_HF', 'HRV_VHF', 'HRV_LFHF', 'HRV_LFn', 'HRV_HFn',
        'HRV_LnHF', 'HRV_SD1', 'HRV_SD2', 'HRV_SD1SD2', 'HRV_S', 'HRV_CSI',
        'HRV_CVI', 'HRV_CSI_Modified', 'HRV_PIP', 'HRV_IALS', 'HRV_PSS',
        'HRV_PAS', 'HRV_GI', 'HRV_SI', 'HRV_AI', 'HRV_PI', 'HRV_C1d',
        'HRV_C1a', 'HRV_SD1d', 'HRV_SD1a', 'HRV_C2d', 'HRV_C2a', 'HRV_SD2d',
        'HRV_SD2a', 'HRV_Cd', 'HRV_Ca', 'HRV_SDNNd', 'HRV_SDNNa', 'HRV_ApEn',
        'HRV_SampEn'
    ]
    features_df['hrv_features'] = features_df.apply(
        lambda x: nk2.hrv(peaks=x['info'], sampling_rate=300), axis=1)
    for name in features_names_hvr:
        features_df[name] = features_df['hrv_features'].apply(
            lambda x: x[name])
    print('-- HRV EXTRACTION DONE --')

    # FINALIZE / SAVE
    features_df = features_df.drop(
        ['rpeaks', 'filtered', 'templates', 'R_peaks', 'hrv_features'], axis=1)
    features_df = features_df.drop(list(additional_peaks), axis=1)
    numberOfFeatures = len(features_df.columns)

    if save:
        features_df.to_csv('./features/features_' + mode + '_' +
                           str(numberOfFeatures) + '.csv',
                           index=False)
        print('-- FEATURES SAVED --')

    return features_df
Exemple #9
0
import csv

data = pd.read_csv('/content/drive/My Drive/Anxiety Spider Dataset/ECG/ECG_Combined.csv',header=None)

data.head

len(data)

ecg=data.iloc[256]
nk.signal_plot(ecg)

signals, info = nk.ecg_process(ecg,sampling_rate=100)
signals

peaks, info = nk.ecg_peaks(ecg, sampling_rate=100)
nk.hrv(peaks, sampling_rate=100, show=True)

ecg_features=nk.hrv(peaks, sampling_rate=100)

# X=ecg_features[["HRV_RMSSD","HRV_MeanNN","HRV_SDNN", "HRV_SDSD", "HRV_CVNN", "HRV_CVSD", "HRV_MedianNN", "HRV_MadNN", "HRV_MCVNN", "HRV_IQRNN", "HRV_pNN50", "HRV_pNN20"]]
# X

data_features=[]
for i in range(0,len(data)):
  ecg=data.iloc[i]
  peaks, info = nk.ecg_peaks(ecg, sampling_rate=100)
  ecg_features=nk.hrv(peaks, sampling_rate=100)
  X=ecg_features[["HRV_RMSSD","HRV_MeanNN","HRV_SDNN", "HRV_SDSD", "HRV_CVNN", "HRV_CVSD", "HRV_MedianNN", "HRV_MadNN", "HRV_MCVNN", "HRV_IQRNN", "HRV_pNN50", "HRV_pNN20", "HRV_TINN",	"HRV_HTI",	"HRV_ULF",	"HRV_VLF",	"HRV_LF",	"HRV_HF",	"HRV_VHF",	"HRV_LFHF", "HRV_LFn",	"HRV_HFn",	"HRV_LnHF",	"HRV_SD1",	"HRV_SD2",	"HRV_SD1SD2",	"HRV_S",	"HRV_CSI",	"HRV_CVI", "HRV_CSI_Modified", "HRV_PIP",	"HRV_IALS",	"HRV_PSS",	"HRV_PAS",	"HRV_GI",	"HRV_SI",	"HRV_AI",	"HRV_PI",	"HRV_C1d",	"HRV_C1a",	"HRV_SD1d", "HRV_SD1a",	"HRV_C2d",	"HRV_C2a",	"HRV_SD2d",	"HRV_SD2a",	"HRV_Cd",	"HRV_Ca",	"HRV_SDNNd",	"HRV_SDNNa",	"HRV_ApEn",	"HRV_SampEn"]]
  data_features.append(X)

with open('output.csv', 'w', newline='') as file:
Exemple #10
0
def extract_bvp_features(bvp_data, sampling_rate):
    # Extract Heart Rate, RR Interval, and Heart Rate Variability features from PPG signals
    # bvp_data = MinMaxScaler().fit_transform(np.array(bvp_data).reshape(-1, 1)).ravel()
    ppg_signals, info = nk.ppg_process(bvp_data, sampling_rate=sampling_rate)
    hr = ppg_signals['PPG_Rate']
    # hr = MinMaxScaler().fit_transform(np.array(hr).reshape(-1, 1)).ravel()
    peaks = info['PPG_Peaks']

    # Sanitize input
    peaks = _hrv_sanitize_input(peaks)
    if isinstance(peaks, tuple):  # Detect actual sampling rate
        peaks, sampling_rate = peaks[0], peaks[1]
    rri = _hrv_get_rri(peaks, sampling_rate=sampling_rate, interpolate=False)
    diff_rri = np.diff(rri)
    hrv_features = nk.hrv(
        peaks, sampling_rate=sampling_rate
    )  # Ignore NeuroKitWarning: The duration of recording is too short to support a sufficiently long window for high frequency resolution as we used another frequency for hrv_frequency
    hrv_frequency = nk.hrv_frequency(
        peaks,
        sampling_rate=sampling_rate,
        ulf=(0.01, 0.04),
        lf=(0.04, 0.15),
        hf=(0.15, 0.4)
    )  # the parameters of ULF, LF, HF follows the original paper of WESAD dataset

    # Philip Schmidt, Attila Reiss, Robert Duerichen, Claus Marberger, and Kristof Van Laerhoven. 2018. Introducing WESAD, a Multimodal Dataset for Wearable Stress and Affect Detection.
    # In Proceedings of the 20th ACM International Conference on Multimodal Interaction (ICMI '18). Association for Computing Machinery, New York, NY, USA, 400–408. DOI:https://doi.org/10.1145/3242969.3242985
    # Not including: f_x_HRV of ULF and HLF, rel_f_x, sum f_x_HRV
    mean_HR, std_HR = np.mean(hr), np.std(hr)
    mean_HRV, std_HRV = hrv_features['HRV_MeanNN'], hrv_features['HRV_SDNN']
    HRV_ULF, HRV_LF, HRV_HF, HRV_LFHF, HRV_LFnorm, HRV_HFnorm = hrv_frequency[
        'HRV_ULF'], hrv_frequency['HRV_LF'], hrv_frequency[
            'HRV_HF'], hrv_frequency['HRV_LFHF'], hrv_frequency[
                'HRV_LFn'], hrv_frequency['HRV_HFn']
    rms = np.sqrt(np.nanmean(rri**2))
    nn50 = np.sum(np.abs(diff_rri) > 50)
    HRV_TINN, HRV_pNN50, HRV_RMSSD = hrv_features['HRV_TINN'], hrv_features[
        'HRV_pNN50'], hrv_features['HRV_RMSSD']

    # Nkurikiyeyezu, K., Yokokubo, A., & Lopez, G. (2019). The Influence of Person-Specific Biometrics in Improving Generic Stress Predictive Models.
    # ArXiv, abs/1910.01770.
    kurtosis_HRV, skewness_HRV = kurtosis(rri), skew(rri)
    HRV_VLF = hrv_frequency['HRV_VLF']
    HRV_SD1, HRV_SD2 = hrv_features['HRV_SD1'], hrv_features['HRV_SD2']
    HRV_SDSD = hrv_features['HRV_SDSD']
    HRV_SDSD_RMSSD = HRV_SDSD / HRV_RMSSD
    adj_sum_rri = diff_rri + 2 * rri[:-1]
    HRV_pNN25 = np.sum(np.abs(diff_rri) > 25) / len(rri) * 100
    relative_RRI = 2 * diff_rri / adj_sum_rri
    mean_relativeRRI, median_relativeRRI, std_relativeRRI, RMSSD_relativeRRI, kurtosis_relativeRRI, skew_relativeRRI = np.mean(
        relative_RRI), np.median(relative_RRI), np.std(relative_RRI), np.sqrt(
            np.mean(np.diff(relative_RRI)**2)), kurtosis(relative_RRI), skew(
                relative_RRI)

    # Combining the extracted features
    features = [
        mean_HR, std_HR, mean_HRV, std_HRV, kurtosis_HRV, skewness_HRV, rms,
        nn50, HRV_pNN50, HRV_pNN25, HRV_TINN, HRV_RMSSD, HRV_LF, HRV_HF,
        HRV_LFHF, HRV_LFnorm, HRV_HFnorm, HRV_SD1, HRV_SD2, HRV_SDSD,
        HRV_SDSD_RMSSD, mean_relativeRRI, median_relativeRRI, std_relativeRRI,
        RMSSD_relativeRRI, kurtosis_relativeRRI, skew_relativeRRI
    ]
    features = np.array(list(map(float, features)))
    return features
Exemple #11
0
def my_hrv(peaks, sampling_rate=300):
    try:
        return hrv(peaks=peaks, sampling_rate=sampling_rate)
    except:
        return np.NaN