def __sleep_boundaries_with_annotations(wearable,
                                            output_col,
                                            annotation_col,
                                            hour_to_start_search=18,
                                            merge_tolerance_in_minutes=20,
                                            only_largest_sleep_period=True):

        if annotation_col not in wearable.data.keys():
            raise KeyError("Col %s is not a valid for pid %s" %
                           (annotation_col, wearable.get_pid()))

        saved_hour_start_day = wearable.hour_start_experiment
        wearable.change_start_hour_for_experiment_day(hour_to_start_search)

        wearable.data["hyp_sleep_candidate"] = wearable.data[
            annotation_col].copy()

        # Annotates the sequences of sleep_candidate
        wearable.data["hyp_seq_length"], wearable.data[
            "hyp_seq_id"] = misc.get_consecutive_series(
                wearable.data, "hyp_sleep_candidate")

        wearable.data["hyp_sleep_candidate"], wearable.data[
            "hyp_seq_length"], wearable.data[
                "hyp_seq_id"] = misc.merge_sequences_given_tolerance(
                    wearable.data, wearable.time_col, "hyp_sleep_candidate",
                    merge_tolerance_in_minutes)

        if only_largest_sleep_period:
            grps = wearable.data.groupby(wearable.experiment_day_col)
            tmp_df = []
            for grp_id, grp_df in grps:
                gdf = grp_df.copy()
                gdf["hyp_seq_length"], gdf[
                    "hyp_seq_id"] = misc.get_consecutive_series(
                        gdf, "hyp_sleep_candidate")
                df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate",
                                                    output_col).replace(
                                                        -1, False)
                tmp_df.append(df_out)
            wearable.data[output_col] = pd.concat(tmp_df)
        else:
            wearable.data[output_col] = False
            wearable.data.loc[wearable.data[(
                wearable.data["hyp_sleep_candidate"] == 1)].index,
                              output_col] = True

        del wearable.data["hyp_seq_id"]
        del wearable.data["hyp_seq_length"]
        del wearable.data["hyp_sleep_candidate"]

        wearable.change_start_hour_for_experiment_day(saved_hour_start_day)
Exemple #2
0
    def __create_threshold_col_based_on_time(df, time_col: str, hr_col: str, start_time: int, end_time: int,
                                             quantile: float, rolling_win_in_minutes: int,
                                             sleep_only_in_sleep_search_window: bool):

        df_time = df.set_index(time_col).copy()

        # This will get the index of everything outside <start_time, end_time>
        idx = df_time.between_time('%02d:00' % end_time,
                                   '%02d:00' % start_time,
                                   include_start=False,
                                   include_end=False).index
        # We set the hr_col to nan for the time outside the search win in order to find the quantile below ignoring nans
        df_time.loc[idx, hr_col] = np.nan

        quantiles_per_day = df_time[hr_col].resample('24H', offset="%dh" % start_time).quantile(quantile).dropna()
        df_time["hyp_sleep"] = quantiles_per_day
        if quantiles_per_day.index[0] < df_time.index[0]:
            df_time.loc[df_time.index[0], "hyp_sleep"] = quantiles_per_day.iloc[0]

        # We fill the nans in the df_time and copy the result back to the original df
        df_time["hyp_sleep"] = df_time["hyp_sleep"].fillna(method='ffill').fillna(method='bfill')

        # binarize_by_hr_threshold
        df_time["hyp_sleep_bin"] = np.where((df_time[hr_col] - df_time["hyp_sleep"]) > 0, 0, 1)
        df_time["hyp_sleep_bin"] = df_time["hyp_sleep_bin"].rolling(window=rolling_win_in_minutes).median().fillna(
            method='bfill')

        if sleep_only_in_sleep_search_window:
            #  Ignore all sleep candidate period outsite win
            df_time.loc[idx, "hyp_sleep_bin"] = 0

        seq_length, seq_id = misc.get_consecutive_series(df_time, "hyp_sleep_bin")

        return df_time["hyp_sleep"].values, df_time["hyp_sleep_bin"].values, seq_length.values, seq_id.values
    def calculate_awakening(df_in: pd.DataFrame, sleep_wake_col: str, ignore_awakenings_smaller_than_X_epochs: int = 0,
                            normalize_per_hour: bool = False, epochs_in_hour: int = 0) -> int:
        """
        This method calculates the number of awakenings (changes from sleep to wake stage).
        It uses the ``sleep_wake_col`` for that, which is the result of any Sleep/Wake algorithm (see SleepWakeAnalysis).
        The parameter ``ignore_awakenings_smaller_than_X_epochs`` is used to avoid small signal fluctuations from sleep to wake.


        :param df_in: (partial) dataset to be analysed.
        :param sleep_wake_col: Dataframe column for a Sleep/Wake algorithm. Sleep = 1, Wake = 0.
        :param ignore_awakenings_smaller_than_X_epochs: Ignores changes from sleep to wake if they are smaller than X epochs.
        :param normalize_per_hour: controls if the result should be normalized per hour of sleep or not
        :param epochs_in_hour: if ``normalize_per_hour`` is True, this parameter used in the normalization.
        :return: Number of awakenings in the df_in[sleep_wake_col] (normalized per hour if ``normalize_per_hour`` is True.
        """
        df = df_in.copy()

        df["consecutive_state"], df["gids"] = misc.get_consecutive_series(df, sleep_wake_col)
        # We ignore the first group of awakenings, as this method is only interested to count the number
        # of sequequencies after the subject slept for the first time.
        if df[(df["gids"] == 0) & (df[sleep_wake_col] == 0)].shape[0] > 0:
            df = df[(df["gids"] > 0)]

        grps = df[(df[sleep_wake_col] == 0) & (df["consecutive_state"] > ignore_awakenings_smaller_than_X_epochs)].groupby("gids")
        del df["consecutive_state"]
        del df["gids"]

        if normalize_per_hour:
            total_hours_slept = df.shape[0] / epochs_in_hour
            return len(grps) / total_hours_slept
        else:
            return len(grps)
    def calculate_sleep_efficiency(df_in, sleep_wake_col: str, ignore_awakenings_smaller_than_X_epochs: int = 0) -> float:
        """
        This method calculates the sleep efficiency from an input dataset.
        The sleep efficiency is calculated on the ``sleep_wake_col``, which is the result of any Sleep/Wake algorithm (see SleepWakeAnalysis).
        The parameter ``ignore_awakenings_smaller_than_X_epochs`` is used to avoid small signal fluctuations from sleep to wake.

        :param df_in: (partial) dataset to be analysed.
        :param sleep_wake_col: Dataframe column for a Sleep/Wake algorithm. Sleep = 1, Wake = 0.
        :param ignore_awakenings_smaller_than_X_epochs: Ignores changes from sleep to wake if they are smaller than X epochs.
        :return: sleep quality from 0 - 100 (the higher the better sleep quality)
        """

        if ignore_awakenings_smaller_than_X_epochs == 0:
            return 100. * (df_in[sleep_wake_col].sum() / df_in.shape[0]) if df_in.shape[0] > 0 else 0

        else:
            # Avoid modifying the original values in the wake col
            df = df_in[[sleep_wake_col]].copy()
            df["consecutive_state"], _ = misc.get_consecutive_series(df, sleep_wake_col)

            # If number of wakes (= 0) is smaller than X epochs, convert them to sleep (1):
            df.loc[(df[sleep_wake_col] == 0) & (
                    df["consecutive_state"] <= ignore_awakenings_smaller_than_X_epochs), sleep_wake_col] = 1
            sleep_quality = 100. * (df[sleep_wake_col].sum() / df.shape[0]) if df_in.shape[0] > 0 else 0
            return sleep_quality
Exemple #5
0
    def create_day_sleep_experiment_day(self,
                                        sleep_col: str,
                                        new_col: str = 'day_night_sequence',
                                        start_by_awaken_part: bool = True):
        """
        Adds a column to the wearable data.
        This column will be similar to ``experiment_day``, however instead of having a fixed size, it will follow the day/sleep cycle.
        This is not by exploring the annotations made by the SleepBoudaryDetector module, represented here by the ``sleep_col``.

        :param sleep_col: sleep_col resulted from SleepBoudaryDetector.detect_sleep_boundary()
        :param new_col: the name of the new column created
        :param start_by_awaken_part: should we start sequence id 0 with the day part (True) or not (False)
        """

        if not sleep_col:
            raise ValueError("sleep_col arg is None or empty")

        if sleep_col and (sleep_col not in self.data.keys()):
            raise ValueError(
                "Could not find sleep_col named %s for PID %s. Aborting." %
                (sleep_col, self.get_pid()))

        if self.data[sleep_col].dtype != bool:
            raise ValueError(
                "Column sleep_col named %s for PID %s is not of type bool. Aborting."
                % (sleep_col, self.get_pid()))

        seq_length, seq_id = misc.get_consecutive_series(self.data, sleep_col)

        # sleep col is a binary with True meaning Sleep and False meaning Awake
        first_seq_sleep = self.data[sleep_col].iloc[0]

        # We should alter the sequence if:
        # (1) the first epoch is sleep, but we should start by the day part.
        # (2) first epoch is a day, but we should start by the sleep part
        if (first_seq_sleep
                and start_by_awaken_part) or (not first_seq_sleep
                                              and not start_by_awaken_part):
            seq_id = seq_id - 1

        # We can use the seq_id to create the new experiment_day col that uses the sequences of awakening and sleep
        self.data[new_col] = seq_id // 2

        # warnings.warn("Switching exp_day_col to %s" % ml_column)
        self.set_experiment_day_col(new_col)
    def get_bouts(self,
                  pa_col: str,
                  length_in_minutes: int,
                  pa_allowance_in_minutes: int,
                  resolution: str,
                  sleep_col: object = None) -> pd.DataFrame:
        """
        Return the bouts for a given physical activity column (``pa_col``).
        One bout is counted when ``pa_col`` is True for more than ``length_in_minutes``.
        We allow up to ``pa_allowance_in_minutes`` minutes of physical activity below the minimal required for a pa level.
        If ``sleep_col`` is used, we do not count bouts when data[sleep_col] is True.
        ``resolution`` can currently be either "day" or "hour".

        :param pa_col:                   The name of the physical activity column in the dataframe.
        :param length_in_minutes:        The minimal length of the activity in minutes
        :param pa_allowance_in_minutes:  The maximum allowance of minutes in which a bout is still counted.
        :param resolution:               Either "day" or "hour". The resolution expected for output.
        :param sleep_col:                If a valid binary column, we ignore bouts that happened when the value of this col is True.
                                         Make sure to run SleepBoudaryDetector.detect_sleep_boundaries() first.
        :return:                         A dataframe counting the number of bouts for the given physical activity level
        """

        if pa_col not in self.names:
            raise ValueError(
                "Unknown physical activity column %s. Please use ``set_cutoffs``."
            )

        returning_df = []
        for wearable in self.wearables:

            if sleep_col and (sleep_col not in wearable.data.keys()):
                raise ValueError(
                    "Could not find sleep_col named %s for PID %s. Aborting." %
                    (sleep_col, wearable.get_pid()))
            df = wearable.data.copy()
            min_num_epochs = wearable.get_epochs_in_min() * length_in_minutes

            df["pa_len"], df["pa_grp"] = misc.get_consecutive_series(
                df, pa_col)
            # We admit up to allowance minutes
            df[pa_col], df["pa_len"], df[
                "pa_grp"] = misc.merge_sequences_given_tolerance(
                    df,
                    "hyp_time_col",
                    pa_col,
                    pa_allowance_in_minutes,
                    seq_id_col="pa_grp",
                    seq_length_col="pa_len")

            # calculate all possible bouts, either including the sleep period or not
            if sleep_col:
                bouts = df[(df[pa_col] == True)
                           & (df["pa_len"] >= min_num_epochs) &
                           (df[sleep_col] == False)]
            else:
                bouts = df[(df[pa_col] == True)
                           & (df["pa_len"] >= min_num_epochs)]

            # drop_duplicates is used to get only the first occurrence of a bout sequence.
            bouts = bouts[[
                "hyp_time_col",
                wearable.get_experiment_day_col(), "pa_grp", "pa_len", pa_col
            ]].drop_duplicates(subset=["pa_grp"])

            if resolution == "day":
                tmp_df = bouts.groupby([wearable.get_experiment_day_col()
                                        ])[pa_col].sum().reset_index()
            elif resolution == "hour":
                gbouts = bouts.set_index("hyp_time_col")
                tmp_df = gbouts.groupby(
                    [wearable.get_experiment_day_col(),
                     gbouts.index.hour])[pa_col].sum().reset_index()
            else:
                raise ValueError(
                    "The parameter 'resolution' can only be `day` or `hour`.")

            tmp_df["pid"] = wearable.get_pid()
            tmp_df["bout_length"] = length_in_minutes

            returning_df.append(tmp_df)

        returning_df = [x for x in returning_df if type(x) == pd.DataFrame]
        return pd.concat(returning_df).reset_index(drop=True)
Exemple #7
0
    def __sleep_boundaries_with_hr(wearable: Wearable, output_col: str, quantile: float = 0.4,
                                   volarity_threshold: int = 5, rolling_win_in_minutes: int = 5,
                                   sleep_search_window: tuple = (20, 12), min_window_length_in_minutes: int = 40,
                                   volatility_window_in_minutes: int = 10, merge_blocks_gap_time_in_min: int = 240,
                                   sleep_only_in_sleep_search_window: bool = False,
                                   only_largest_sleep_period: bool = False):

        if wearable.hr_col is None:
            raise AttributeError("HR is not available for PID %s." % (wearable.get_pid()))

        rolling_win_in_minutes = int(rolling_win_in_minutes * wearable.get_epochs_in_min())
        min_window_length_in_minutes = int(min_window_length_in_minutes * wearable.get_epochs_in_min())
        volatility_window_in_minutes = int(volatility_window_in_minutes * wearable.get_epochs_in_min())

        df = wearable.data.copy()

        df["hyp_sleep"], df["hyp_sleep_bin"], df["hyp_seq_length"], df[
            "hyp_seq_id"] = SleepBoudaryDetector.__create_threshold_col_based_on_time(wearable.data, wearable.time_col,
                                                                                      wearable.hr_col,
                                                                                      sleep_search_window[0],
                                                                                      sleep_search_window[1],
                                                                                      quantile,
                                                                                      rolling_win_in_minutes,
                                                                                      sleep_only_in_sleep_search_window)

        df['hyp_sleep_candidate'] = ((df["hyp_sleep_bin"] == 1.0) & (
                df['hyp_seq_length'] > min_window_length_in_minutes)).astype(int)

        df["hyp_sleep_vard"] = df[wearable.hr_col].rolling(volatility_window_in_minutes,
                                                           center=True).std().fillna(0)

        df["hyp_seq_length"], df["hyp_seq_id"] = misc.get_consecutive_series(df, "hyp_sleep_candidate")

        # Merge two sleep segments if their gap is smaller than X min (interval per day):
        wearable.data = df
        saved_hour_start_day = wearable.hour_start_experiment
        wearable.change_start_hour_for_experiment_day(sleep_search_window[0])
        grps = wearable.data.groupby(wearable.experiment_day_col)
        tmp_df = []
        for grp_id, grp_df in grps:
            gdf = grp_df.copy()
            gdf["hyp_sleep_candidate"], gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.merge_sequences_given_tolerance(
                gdf, wearable.time_col, "hyp_sleep_candidate", tolerance_in_minutes=merge_blocks_gap_time_in_min)

            tmp_df.append(gdf)
        wearable.data = pd.concat(tmp_df)
        wearable.change_start_hour_for_experiment_day(saved_hour_start_day)

        df = wearable.data.set_index(wearable.time_col)
        new_sleep_segments = df[df["hyp_sleep_candidate"] == 1]["hyp_seq_id"].unique()

        # Check if we can modify the sleep onset/offset
        for sleep_seg_id in new_sleep_segments:
            actual_seg = df[df["hyp_seq_id"] == sleep_seg_id]

            if actual_seg.shape[0] == 0:
                continue

            start_time = actual_seg.index[0]
            end_time = actual_seg.index[-1]

            look_sleep_onset = df[start_time - timedelta(hours=4): start_time + timedelta(minutes=60)]
            look_sleep_offset = df[end_time - timedelta(minutes=1): end_time + timedelta(minutes=120)]

            new_sleep_onset = look_sleep_onset[look_sleep_onset["hyp_sleep_vard"] > volarity_threshold]
            new_sleep_offset = look_sleep_offset[look_sleep_offset["hyp_sleep_vard"] > volarity_threshold]

            new_start = new_sleep_onset.index[-1] if not new_sleep_onset.empty else start_time
            new_end = new_sleep_offset.index[0] if not new_sleep_offset.empty else end_time

            df.loc[new_start:new_end, "hyp_seq_id"] = sleep_seg_id
            # df.loc[new_start:new_end, "hyp_seq_length"] = df.loc[new_start:new_end].shape[0]
            df.loc[new_start:new_end, "hyp_sleep_candidate"] = 1

        # Need to reorganize the sequences.
        df["hyp_seq_length"], df["hyp_seq_id"] = misc.get_consecutive_series(df, "hyp_sleep_candidate")

        # new_sleep_segments = df[df[col_win_night + '_sleep_candidate'] == 1][col_win_night + '_grpid'].unique()
        wearable.data = df.reset_index()

        if only_largest_sleep_period:  # If true, we keep only one sleep period per night.

            saved_hour_start_day = wearable.hour_start_experiment
            wearable.change_start_hour_for_experiment_day(sleep_search_window[0])

            grps = wearable.data.groupby(wearable.experiment_day_col)
            tmp_df = []
            for grp_id, grp_df in grps:
                gdf = grp_df.copy()
                gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.get_consecutive_series(gdf, "hyp_sleep_candidate")
                df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate", output_col).replace(-1, False)
                tmp_df.append(df_out)
            wearable.data[output_col] = pd.concat(tmp_df)
            wearable.change_start_hour_for_experiment_day(saved_hour_start_day)
        else:
            # Save final output
            wearable.data[output_col] = False
            wearable.data.loc[wearable.data[(wearable.data["hyp_sleep_candidate"] == 1)].index, output_col] = True

        # Clean up!
        wearable.data.drop(
            columns=["hyp_sleep", "hyp_sleep_candidate", "hyp_seq_id",
                     "hyp_sleep_bin",
                     "hyp_sleep_vard", "hyp_seq_length"], inplace=True)
Exemple #8
0
    def __sleep_boundaries_with_angle_change_algorithm(wearable: Wearable, output_col: str,
                                                       start_hour: int = 15,
                                                       cols: list = [],
                                                       use_triaxial_activity=False,
                                                       q_sleep: float = 0.1,
                                                       minimum_len_in_minutes: int = 30,
                                                       merge_tolerance_in_minutes: int = 180,
                                                       factor: int = 15,
                                                       operator: str = "or",  # Either 'or' or 'and'
                                                       only_largest_sleep_period: bool = False
                                                       ):

        df_time = wearable.data.copy()
        df_time = df_time.set_index(wearable.time_col)

        five_min = int(5 * wearable.get_epochs_in_min())
        minimum_len_in_minutes = int(minimum_len_in_minutes * wearable.get_epochs_in_min())

        if use_triaxial_activity:
            # Step 1:
            df_time["hyp_rolling_x"] = df_time["hyp_act_x"].rolling("5s").median().fillna(0.0)
            df_time["hyp_rolling_y"] = df_time["hyp_act_y"].rolling("5s").median().fillna(0.0)
            df_time["hyp_rolling_z"] = df_time["hyp_act_z"].rolling("5s").median().fillna(0.0)

            df_time["hyp_act_z"].rolling(five_min).median().fillna(0.0)

            df_time["hyp_angle_z"] = (np.arctan(
                df_time["hyp_rolling_z"] / ((df_time['hyp_rolling_y'] ** 2 + df_time['hyp_rolling_x'] ** 2) ** (
                        1 / 2)))) * 180 / np.pi
            # Step 2:
            df_time["hyp_angle_z"] = df_time["hyp_angle_z"].fillna(0.0)
            # Step 3:
            df_time["hyp_angle_z"] = df_time["hyp_angle_z"].rolling("5s").mean().fillna(0.0)

            cols += ["hyp_angle_z"]

        if operator == "or":
            df_time["hyp_sleep_candidate"] = False
        else:
            df_time["hyp_sleep_candidate"] = True

        for col in cols:
            # Paper's Step 4
            df_time["hyp_" + col + '_diff'] = df_time[col].diff().abs()
            # Paper's Step 5
            df_time["hyp_" + col + '_5mm'] = df_time["hyp_" + col + '_diff'].rolling(five_min).median().fillna(0.0)
            # Paper's Step 6
            quantiles_per_day = df_time["hyp_" + col + '_5mm'].resample('24H', offset="%dh" % start_hour).quantile(
                q_sleep).dropna()
            # print(quantiles_per_day)

            df_time["hyp_" + col + '_10pct'] = quantiles_per_day
            if quantiles_per_day.index[0] < df_time.index[0]:
                df_time.loc[df_time.index[0], "hyp_" + col + '_10pct'] = quantiles_per_day.iloc[0]

            df_time["hyp_" + col + '_10pct'] = df_time["hyp_" + col + '_10pct'].fillna(method='ffill').fillna(
                method='bfill')

            df_time["hyp_" + col + '_bin'] = np.where(
                (df_time["hyp_" + col + '_5mm'] - (df_time["hyp_" + col + '_10pct'] * factor)) > 0, 0, 1)
            df_time["hyp_" + col + '_len'], _ = misc.get_consecutive_series(df_time, "hyp_" + col + '_bin')

            # Paper's Step 7
            if operator == "or":
                df_time["hyp_sleep_candidate"] = df_time["hyp_sleep_candidate"] | (
                            (df_time["hyp_" + col + '_bin'] == 1.0) & (
                            df_time["hyp_" + col + '_len'] > minimum_len_in_minutes))
            else:
                df_time["hyp_sleep_candidate"] = df_time["hyp_sleep_candidate"] & \
                                                 ((df_time["hyp_" + col + '_bin'] == 1.0)
                                                  & (df_time["hyp_" + col + '_len'] > minimum_len_in_minutes))

        # Gets the largest sleep_candidate per night
        wearable.data = df_time.reset_index()
        # wearable.data[output_col] = wearable.data["hyp_sleep_candidate"]

        wearable.data["hyp_seq_length"], wearable.data["hyp_seq_id"] = misc.get_consecutive_series(wearable.data,
                                                                                                   "hyp_sleep_candidate")
        # Paper's Step 8
        wearable.data["hyp_sleep_candidate"], wearable.data["hyp_seq_length"], wearable.data[
            "hyp_seq_id"] = misc.merge_sequences_given_tolerance(wearable.data, wearable.time_col,
                                                                     "hyp_sleep_candidate", merge_tolerance_in_minutes)

        # Paper's Step 9
        if only_largest_sleep_period:  # If true, we keep only one sleep period per night.
            saved_hour_start_day = wearable.hour_start_experiment
            wearable.change_start_hour_for_experiment_day(start_hour)
            grps = wearable.data.groupby(wearable.experiment_day_col)
            tmp_df = []
            for grp_id, grp_df in grps:
                gdf = grp_df.copy()
                gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.get_consecutive_series(gdf, "hyp_sleep_candidate")
                df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate", output_col).replace(-1, False)
                tmp_df.append(df_out)
            wearable.data[output_col] = pd.concat(tmp_df)
            wearable.change_start_hour_for_experiment_day(saved_hour_start_day)
        else:
            # Save final output
            wearable.data[output_col] = False
            wearable.data.loc[wearable.data[(wearable.data["hyp_sleep_candidate"] == 1)].index, output_col] = True

        # Cleaning up...
        cols_to_drop = ["hyp_sleep_candidate", "hyp_seq_length", "hyp_seq_id"]
        for col in cols:
            cols_to_drop.append("hyp_" + col + '_diff')
            cols_to_drop.append("hyp_" + col + '_5mm')
            cols_to_drop.append("hyp_" + col + '_10pct')
            cols_to_drop.append("hyp_" + col + '_len')

        wearable.data.drop(columns=cols_to_drop, inplace=True)