예제 #1
0
def missing_imputation(
    tm: TensorMap,
    hd5: h5py.File,
    visit: str,
    indices: List[int],
    period: str,
    tensor: np.ndarray,
    imputation_type: str = None,
    **kwargs,
):
    if imputation_type == "sample_and_hold":
        if len(tensor) == 0 or np.isnan(tensor).all():
            if period == "pre":
                values = tm.tensor_from_file(tm, hd5, visits=visit,
                                             **kwargs)[0][:indices[-1]]
                indice = -1
            else:
                values = tm.tensor_from_file(tm, hd5, visits=visit,
                                             **kwargs)[0][indices[0]:]
                indice = 0
            imputation = values[~np.isnan(values)]
            if imputation.size == 0:
                imputation = np.array([np.nan])
            tensor = np.array([imputation[indice]])
    elif imputation_type:
        name = tm.name.replace(f"_{imputation_type}", "")
        imputation = ICU_TMAPS_METADATA[name][imputation_type]
        tensor = np.nan_to_num(tensor, nan=imputation)
        if len(tensor) == 0:
            tensor = np.array([imputation])
    return tensor
예제 #2
0
def get_sliding_windows(
    hd5,
    window: int,
    step: int,
    event_tm_1: TensorMap,
    event_tm_2: TensorMap,
    visit_tm: TensorMap,
    buffer_adm_time: int = 24,
    **kwargs,
):
    """
    Create a sliding window from the time associated to <event_tm_1> to <event_tm_2>
    with step size <step> and window length <window>.
    """

    if not hasattr(get_sliding_windows, "windows_cache"):
        get_sliding_windows.windows_cache = {}
    if hd5.id in get_sliding_windows.windows_cache:
        return get_sliding_windows.windows_cache[hd5.id]
    visit = visit_tm.tensor_from_file(visit_tm, hd5, **kwargs)[0]
    event_time_1 = event_tm_1.tensor_from_file(event_tm_1,
                                               hd5,
                                               visits=visit,
                                               unix_dates=True,
                                               **kwargs)
    event_time_1 = event_time_1[0][0]
    event_time_2 = event_tm_2.tensor_from_file(event_tm_2,
                                               hd5,
                                               visits=visit,
                                               **kwargs)
    event_time_2 = event_time_2[0][0]
    windows = np.arange(
        event_time_1 + (buffer_adm_time + window) * 60 * 60,
        event_time_2,
        step * 60 * 60,
    )
    get_sliding_windows.windows_cache[hd5.id] = windows
    if windows.size == 0:
        raise ValueError(
            "It is not possible to compute a sliding window with the given parameters.",
        )
    return windows
    def extract_features_tmaps(
        self,
        signal_tm: TensorMap,
        clean_method: str = "neurokit",
        r_method: str = "neurokit",
        wave_method: str = "dwt",
        min_peaks: int = 200,
    ):
        """
        Function to extract the ecg features using the neurokit2 package. That
        is the P, Q, R, S and T peaks and the P, QRS and T waves onsets and
        offsets. The result is saved internally.

        :param signal_tm: <TensorMap>
        :param clean_method: <str> The processing pipeline to apply. Can be one of
                             ‘neurokit’ (default), ‘biosppy’, ‘pantompkins1985’,
                             ‘hamilton2002’, ‘elgendi2010’, ‘engzeemod2012’.
        :param r_method: <str> The algorithm to be used for R-peak detection. Can be one
                         of ‘neurokit’ (default), ‘pantompkins1985’, ‘hamilton2002’,
                         ‘christov2004’, ‘gamboa2008’, ‘elgendi2010’, ‘engzeemod2012’
                         or ‘kalidas2017’.
        :param wave_method: <str> Can be one of ‘dwt’ (default) for discrete
                            wavelet transform or ‘cwt’ for continuous wavelet transform.
        :param min_peaks: <int> Minimum R peaks to be detected to proceed with
                          further calculations.
        """
        for i, _ in enumerate(self.sampling_rate):
            sampling_rate = self.sampling_rate[i][0]
            init = self.sampling_rate[i][1]
            if i == len(self.sampling_rate) - 1:
                end = -1
            else:
                end = self.sampling_rate[i + 1][1]
            ecg_signal = signal_tm.tensor_from_file(signal_tm,
                                                    self)[0][init:end]
            ecg_signal = nk.ecg_clean(ecg_signal, sampling_rate, clean_method)

            try:
                _, r_peaks = nk.ecg_peaks(ecg_signal, sampling_rate, r_method)
            except IndexError:
                continue
            if len(r_peaks["ECG_R_Peaks"]) < min_peaks:
                continue
            _, waves_peaks = nk.ecg_delineate(ecg_signal, r_peaks,
                                              sampling_rate)
            _, waves_peaks_2 = nk.ecg_delineate(
                ecg_signal,
                r_peaks,
                sampling_rate,
                wave_method,
            )
            waves_peaks.update(waves_peaks_2)
            for peak_type in r_peaks:
                if peak_type not in self.r_peaks:
                    self.r_peaks[peak_type] = r_peaks[peak_type]
                else:
                    self.r_peaks[peak_type] = np.append(
                        self.r_peaks[peak_type],
                        r_peaks[peak_type],
                    )
            for peak_type in waves_peaks:
                if peak_type not in self.waves_peaks:
                    self.waves_peaks[peak_type] = waves_peaks[peak_type]
                else:
                    self.waves_peaks[peak_type] = np.append(
                        self.waves_peaks[peak_type],
                        waves_peaks[peak_type],
                    )

        for peak_type in self.r_peaks:
            self.r_peaks[peak_type] = list(self.r_peaks[peak_type])
        for peak_type in self.waves_peaks:
            self.waves_peaks[peak_type] = list(self.waves_peaks[peak_type])
예제 #4
0
def compute_feature(
    tm: TensorMap,
    hd5: h5py.File,
    visit: str,
    indices: List[int],
    feature: str,
    period: str,
    imputation_type: str = None,
    **kwargs,
):
    if tm.name.endswith("_timeseries") and feature in [
            "mean",
            "std",
            "count",
            "mean_crossing_rate",
    ]:
        raise KeyError(
            f"To compute {feature} use signal_value, not signal_timeseries.", )
    if not tm.name.endswith("_timeseries") and feature in ["mean_slope"]:
        raise KeyError(
            f"To compute {feature} use signal_timeseries, not signal_value.", )

    if len(indices) == 0:
        tensor = np.array([np.nan])
    elif feature == "raw":
        if tm.name.endswith("_timeseries"):
            tensor = tm.tensor_from_file(tm, hd5, visits=visit,
                                         **kwargs)[0][:, indices]
        else:
            tensor = tm.tensor_from_file(tm, hd5, visits=visit,
                                         **kwargs)[0][indices]
    elif feature == "mean_slope":
        values = tm.tensor_from_file(tm, hd5, visits=visit,
                                     **kwargs)[0][:, indices]
        values = np.delete(values, np.where(np.isnan(values))[1], 1)
        if values.size <= 1:
            tensor = np.array([np.nan])
        else:
            tensor = np.nanmean((values[0, 1:] - values[0, :-1]) /
                                (values[1, 1:] - values[1, :-1]), )
    else:
        if tm.name.endswith("_timeseries"):
            values = tm.tensor_from_file(tm, hd5, visits=visit,
                                         **kwargs)[0][0, indices]
        else:
            values = tm.tensor_from_file(tm, hd5, visits=visit,
                                         **kwargs)[0][indices]

        values = values[~np.isnan(values)]
        if feature.endswith("_last_values"):
            number_of_samples = int(feature.split("_")[0])
            values = values[-number_of_samples:]
            while feature != "count" and values.size < number_of_samples:
                values = np.append(np.nan, values)
        if feature == "count":
            tensor = values.size
        elif values.size == 0:
            tensor = np.array([np.nan])
        elif feature in ("last", "first"):
            tensor = values[-1] if feature == "last" else values[0]
        elif feature == "min":
            tensor = np.min(values)
        elif feature == "max":
            tensor = np.max(values)
        elif feature == "median":
            tensor = np.median(values)
        elif feature == "mean":
            tensor = np.mean(values)
        elif feature == "std":
            tensor = np.std(values)
        elif feature == "mean_crossing_rate":
            mean = np.mean(values)
            values = np.sign(values - mean)
            tensor = np.where(values[1:] - values[:-1])[0].size
        else:
            raise KeyError("Unable to compute feature {feature}.")

    tensor = missing_imputation(
        tm=tm,
        hd5=hd5,
        visit=visit,
        indices=indices,
        period=period,
        tensor=tensor,
        imputation_type=imputation_type,
        **kwargs,
    )

    if tm.name.endswith("_timeseries") and feature in [
            "min",
            "max",
            "median",
            "first",
            "last",
    ]:
        # Obtain time indice where the feature is found
        if np.isnan(tensor).all():
            tensor = np.array([np.nan, np.nan])
        elif feature in ("last", "first"):
            sample_time = -1 if feature == "last" else 0
        else:
            # We obtain the argmin of the absolute value of the difference, that is
            # the index of the sample that has the closest value to the feature
            # If there are more than two values with the feature value,
            # this approach will return the first one. If the period is pre event,
            # we want the last one (closest to the event) so the array is reversed
            if period == "pre":
                sample_time = abs(
                    np.flip(
                        tm.tensor_from_file(tm, hd5, visits=visit, **
                                            kwargs)[0][0, indices, ] -
                        tensor, ), ).argmin()
                # As we reversed the array, we recompute the original indice
                sample_time = len(indices) - sample_time - 1
            else:
                sample_time = abs(
                    tm.tensor_from_file(tm, hd5, visits=visit, **
                                        kwargs)[0][0, indices] -
                    tensor, ).argmin()

        time = tm.tensor_from_file(tm, hd5, visits=visit,
                                   **kwargs)[0][1, indices][sample_time]
        tensor = np.array([tensor, time])

    return tensor if isinstance(tensor, np.ndarray) else np.array([tensor])