def __sleep_boundaries_with_annotations(wearable, output_col, annotation_col, hour_to_start_search=18, merge_tolerance_in_minutes=20, only_largest_sleep_period=True): if annotation_col not in wearable.data.keys(): raise KeyError("Col %s is not a valid for pid %s" % (annotation_col, wearable.get_pid())) saved_hour_start_day = wearable.hour_start_experiment wearable.change_start_hour_for_experiment_day(hour_to_start_search) wearable.data["hyp_sleep_candidate"] = wearable.data[ annotation_col].copy() # Annotates the sequences of sleep_candidate wearable.data["hyp_seq_length"], wearable.data[ "hyp_seq_id"] = misc.get_consecutive_series( wearable.data, "hyp_sleep_candidate") wearable.data["hyp_sleep_candidate"], wearable.data[ "hyp_seq_length"], wearable.data[ "hyp_seq_id"] = misc.merge_sequences_given_tolerance( wearable.data, wearable.time_col, "hyp_sleep_candidate", merge_tolerance_in_minutes) if only_largest_sleep_period: grps = wearable.data.groupby(wearable.experiment_day_col) tmp_df = [] for grp_id, grp_df in grps: gdf = grp_df.copy() gdf["hyp_seq_length"], gdf[ "hyp_seq_id"] = misc.get_consecutive_series( gdf, "hyp_sleep_candidate") df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate", output_col).replace( -1, False) tmp_df.append(df_out) wearable.data[output_col] = pd.concat(tmp_df) else: wearable.data[output_col] = False wearable.data.loc[wearable.data[( wearable.data["hyp_sleep_candidate"] == 1)].index, output_col] = True del wearable.data["hyp_seq_id"] del wearable.data["hyp_seq_length"] del wearable.data["hyp_sleep_candidate"] wearable.change_start_hour_for_experiment_day(saved_hour_start_day)
def __create_threshold_col_based_on_time(df, time_col: str, hr_col: str, start_time: int, end_time: int, quantile: float, rolling_win_in_minutes: int, sleep_only_in_sleep_search_window: bool): df_time = df.set_index(time_col).copy() # This will get the index of everything outside <start_time, end_time> idx = df_time.between_time('%02d:00' % end_time, '%02d:00' % start_time, include_start=False, include_end=False).index # We set the hr_col to nan for the time outside the search win in order to find the quantile below ignoring nans df_time.loc[idx, hr_col] = np.nan quantiles_per_day = df_time[hr_col].resample('24H', offset="%dh" % start_time).quantile(quantile).dropna() df_time["hyp_sleep"] = quantiles_per_day if quantiles_per_day.index[0] < df_time.index[0]: df_time.loc[df_time.index[0], "hyp_sleep"] = quantiles_per_day.iloc[0] # We fill the nans in the df_time and copy the result back to the original df df_time["hyp_sleep"] = df_time["hyp_sleep"].fillna(method='ffill').fillna(method='bfill') # binarize_by_hr_threshold df_time["hyp_sleep_bin"] = np.where((df_time[hr_col] - df_time["hyp_sleep"]) > 0, 0, 1) df_time["hyp_sleep_bin"] = df_time["hyp_sleep_bin"].rolling(window=rolling_win_in_minutes).median().fillna( method='bfill') if sleep_only_in_sleep_search_window: # Ignore all sleep candidate period outsite win df_time.loc[idx, "hyp_sleep_bin"] = 0 seq_length, seq_id = misc.get_consecutive_series(df_time, "hyp_sleep_bin") return df_time["hyp_sleep"].values, df_time["hyp_sleep_bin"].values, seq_length.values, seq_id.values
def calculate_awakening(df_in: pd.DataFrame, sleep_wake_col: str, ignore_awakenings_smaller_than_X_epochs: int = 0, normalize_per_hour: bool = False, epochs_in_hour: int = 0) -> int: """ This method calculates the number of awakenings (changes from sleep to wake stage). It uses the ``sleep_wake_col`` for that, which is the result of any Sleep/Wake algorithm (see SleepWakeAnalysis). The parameter ``ignore_awakenings_smaller_than_X_epochs`` is used to avoid small signal fluctuations from sleep to wake. :param df_in: (partial) dataset to be analysed. :param sleep_wake_col: Dataframe column for a Sleep/Wake algorithm. Sleep = 1, Wake = 0. :param ignore_awakenings_smaller_than_X_epochs: Ignores changes from sleep to wake if they are smaller than X epochs. :param normalize_per_hour: controls if the result should be normalized per hour of sleep or not :param epochs_in_hour: if ``normalize_per_hour`` is True, this parameter used in the normalization. :return: Number of awakenings in the df_in[sleep_wake_col] (normalized per hour if ``normalize_per_hour`` is True. """ df = df_in.copy() df["consecutive_state"], df["gids"] = misc.get_consecutive_series(df, sleep_wake_col) # We ignore the first group of awakenings, as this method is only interested to count the number # of sequequencies after the subject slept for the first time. if df[(df["gids"] == 0) & (df[sleep_wake_col] == 0)].shape[0] > 0: df = df[(df["gids"] > 0)] grps = df[(df[sleep_wake_col] == 0) & (df["consecutive_state"] > ignore_awakenings_smaller_than_X_epochs)].groupby("gids") del df["consecutive_state"] del df["gids"] if normalize_per_hour: total_hours_slept = df.shape[0] / epochs_in_hour return len(grps) / total_hours_slept else: return len(grps)
def calculate_sleep_efficiency(df_in, sleep_wake_col: str, ignore_awakenings_smaller_than_X_epochs: int = 0) -> float: """ This method calculates the sleep efficiency from an input dataset. The sleep efficiency is calculated on the ``sleep_wake_col``, which is the result of any Sleep/Wake algorithm (see SleepWakeAnalysis). The parameter ``ignore_awakenings_smaller_than_X_epochs`` is used to avoid small signal fluctuations from sleep to wake. :param df_in: (partial) dataset to be analysed. :param sleep_wake_col: Dataframe column for a Sleep/Wake algorithm. Sleep = 1, Wake = 0. :param ignore_awakenings_smaller_than_X_epochs: Ignores changes from sleep to wake if they are smaller than X epochs. :return: sleep quality from 0 - 100 (the higher the better sleep quality) """ if ignore_awakenings_smaller_than_X_epochs == 0: return 100. * (df_in[sleep_wake_col].sum() / df_in.shape[0]) if df_in.shape[0] > 0 else 0 else: # Avoid modifying the original values in the wake col df = df_in[[sleep_wake_col]].copy() df["consecutive_state"], _ = misc.get_consecutive_series(df, sleep_wake_col) # If number of wakes (= 0) is smaller than X epochs, convert them to sleep (1): df.loc[(df[sleep_wake_col] == 0) & ( df["consecutive_state"] <= ignore_awakenings_smaller_than_X_epochs), sleep_wake_col] = 1 sleep_quality = 100. * (df[sleep_wake_col].sum() / df.shape[0]) if df_in.shape[0] > 0 else 0 return sleep_quality
def create_day_sleep_experiment_day(self, sleep_col: str, new_col: str = 'day_night_sequence', start_by_awaken_part: bool = True): """ Adds a column to the wearable data. This column will be similar to ``experiment_day``, however instead of having a fixed size, it will follow the day/sleep cycle. This is not by exploring the annotations made by the SleepBoudaryDetector module, represented here by the ``sleep_col``. :param sleep_col: sleep_col resulted from SleepBoudaryDetector.detect_sleep_boundary() :param new_col: the name of the new column created :param start_by_awaken_part: should we start sequence id 0 with the day part (True) or not (False) """ if not sleep_col: raise ValueError("sleep_col arg is None or empty") if sleep_col and (sleep_col not in self.data.keys()): raise ValueError( "Could not find sleep_col named %s for PID %s. Aborting." % (sleep_col, self.get_pid())) if self.data[sleep_col].dtype != bool: raise ValueError( "Column sleep_col named %s for PID %s is not of type bool. Aborting." % (sleep_col, self.get_pid())) seq_length, seq_id = misc.get_consecutive_series(self.data, sleep_col) # sleep col is a binary with True meaning Sleep and False meaning Awake first_seq_sleep = self.data[sleep_col].iloc[0] # We should alter the sequence if: # (1) the first epoch is sleep, but we should start by the day part. # (2) first epoch is a day, but we should start by the sleep part if (first_seq_sleep and start_by_awaken_part) or (not first_seq_sleep and not start_by_awaken_part): seq_id = seq_id - 1 # We can use the seq_id to create the new experiment_day col that uses the sequences of awakening and sleep self.data[new_col] = seq_id // 2 # warnings.warn("Switching exp_day_col to %s" % ml_column) self.set_experiment_day_col(new_col)
def get_bouts(self, pa_col: str, length_in_minutes: int, pa_allowance_in_minutes: int, resolution: str, sleep_col: object = None) -> pd.DataFrame: """ Return the bouts for a given physical activity column (``pa_col``). One bout is counted when ``pa_col`` is True for more than ``length_in_minutes``. We allow up to ``pa_allowance_in_minutes`` minutes of physical activity below the minimal required for a pa level. If ``sleep_col`` is used, we do not count bouts when data[sleep_col] is True. ``resolution`` can currently be either "day" or "hour". :param pa_col: The name of the physical activity column in the dataframe. :param length_in_minutes: The minimal length of the activity in minutes :param pa_allowance_in_minutes: The maximum allowance of minutes in which a bout is still counted. :param resolution: Either "day" or "hour". The resolution expected for output. :param sleep_col: If a valid binary column, we ignore bouts that happened when the value of this col is True. Make sure to run SleepBoudaryDetector.detect_sleep_boundaries() first. :return: A dataframe counting the number of bouts for the given physical activity level """ if pa_col not in self.names: raise ValueError( "Unknown physical activity column %s. Please use ``set_cutoffs``." ) returning_df = [] for wearable in self.wearables: if sleep_col and (sleep_col not in wearable.data.keys()): raise ValueError( "Could not find sleep_col named %s for PID %s. Aborting." % (sleep_col, wearable.get_pid())) df = wearable.data.copy() min_num_epochs = wearable.get_epochs_in_min() * length_in_minutes df["pa_len"], df["pa_grp"] = misc.get_consecutive_series( df, pa_col) # We admit up to allowance minutes df[pa_col], df["pa_len"], df[ "pa_grp"] = misc.merge_sequences_given_tolerance( df, "hyp_time_col", pa_col, pa_allowance_in_minutes, seq_id_col="pa_grp", seq_length_col="pa_len") # calculate all possible bouts, either including the sleep period or not if sleep_col: bouts = df[(df[pa_col] == True) & (df["pa_len"] >= min_num_epochs) & (df[sleep_col] == False)] else: bouts = df[(df[pa_col] == True) & (df["pa_len"] >= min_num_epochs)] # drop_duplicates is used to get only the first occurrence of a bout sequence. bouts = bouts[[ "hyp_time_col", wearable.get_experiment_day_col(), "pa_grp", "pa_len", pa_col ]].drop_duplicates(subset=["pa_grp"]) if resolution == "day": tmp_df = bouts.groupby([wearable.get_experiment_day_col() ])[pa_col].sum().reset_index() elif resolution == "hour": gbouts = bouts.set_index("hyp_time_col") tmp_df = gbouts.groupby( [wearable.get_experiment_day_col(), gbouts.index.hour])[pa_col].sum().reset_index() else: raise ValueError( "The parameter 'resolution' can only be `day` or `hour`.") tmp_df["pid"] = wearable.get_pid() tmp_df["bout_length"] = length_in_minutes returning_df.append(tmp_df) returning_df = [x for x in returning_df if type(x) == pd.DataFrame] return pd.concat(returning_df).reset_index(drop=True)
def __sleep_boundaries_with_hr(wearable: Wearable, output_col: str, quantile: float = 0.4, volarity_threshold: int = 5, rolling_win_in_minutes: int = 5, sleep_search_window: tuple = (20, 12), min_window_length_in_minutes: int = 40, volatility_window_in_minutes: int = 10, merge_blocks_gap_time_in_min: int = 240, sleep_only_in_sleep_search_window: bool = False, only_largest_sleep_period: bool = False): if wearable.hr_col is None: raise AttributeError("HR is not available for PID %s." % (wearable.get_pid())) rolling_win_in_minutes = int(rolling_win_in_minutes * wearable.get_epochs_in_min()) min_window_length_in_minutes = int(min_window_length_in_minutes * wearable.get_epochs_in_min()) volatility_window_in_minutes = int(volatility_window_in_minutes * wearable.get_epochs_in_min()) df = wearable.data.copy() df["hyp_sleep"], df["hyp_sleep_bin"], df["hyp_seq_length"], df[ "hyp_seq_id"] = SleepBoudaryDetector.__create_threshold_col_based_on_time(wearable.data, wearable.time_col, wearable.hr_col, sleep_search_window[0], sleep_search_window[1], quantile, rolling_win_in_minutes, sleep_only_in_sleep_search_window) df['hyp_sleep_candidate'] = ((df["hyp_sleep_bin"] == 1.0) & ( df['hyp_seq_length'] > min_window_length_in_minutes)).astype(int) df["hyp_sleep_vard"] = df[wearable.hr_col].rolling(volatility_window_in_minutes, center=True).std().fillna(0) df["hyp_seq_length"], df["hyp_seq_id"] = misc.get_consecutive_series(df, "hyp_sleep_candidate") # Merge two sleep segments if their gap is smaller than X min (interval per day): wearable.data = df saved_hour_start_day = wearable.hour_start_experiment wearable.change_start_hour_for_experiment_day(sleep_search_window[0]) grps = wearable.data.groupby(wearable.experiment_day_col) tmp_df = [] for grp_id, grp_df in grps: gdf = grp_df.copy() gdf["hyp_sleep_candidate"], gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.merge_sequences_given_tolerance( gdf, wearable.time_col, "hyp_sleep_candidate", tolerance_in_minutes=merge_blocks_gap_time_in_min) tmp_df.append(gdf) wearable.data = pd.concat(tmp_df) wearable.change_start_hour_for_experiment_day(saved_hour_start_day) df = wearable.data.set_index(wearable.time_col) new_sleep_segments = df[df["hyp_sleep_candidate"] == 1]["hyp_seq_id"].unique() # Check if we can modify the sleep onset/offset for sleep_seg_id in new_sleep_segments: actual_seg = df[df["hyp_seq_id"] == sleep_seg_id] if actual_seg.shape[0] == 0: continue start_time = actual_seg.index[0] end_time = actual_seg.index[-1] look_sleep_onset = df[start_time - timedelta(hours=4): start_time + timedelta(minutes=60)] look_sleep_offset = df[end_time - timedelta(minutes=1): end_time + timedelta(minutes=120)] new_sleep_onset = look_sleep_onset[look_sleep_onset["hyp_sleep_vard"] > volarity_threshold] new_sleep_offset = look_sleep_offset[look_sleep_offset["hyp_sleep_vard"] > volarity_threshold] new_start = new_sleep_onset.index[-1] if not new_sleep_onset.empty else start_time new_end = new_sleep_offset.index[0] if not new_sleep_offset.empty else end_time df.loc[new_start:new_end, "hyp_seq_id"] = sleep_seg_id # df.loc[new_start:new_end, "hyp_seq_length"] = df.loc[new_start:new_end].shape[0] df.loc[new_start:new_end, "hyp_sleep_candidate"] = 1 # Need to reorganize the sequences. df["hyp_seq_length"], df["hyp_seq_id"] = misc.get_consecutive_series(df, "hyp_sleep_candidate") # new_sleep_segments = df[df[col_win_night + '_sleep_candidate'] == 1][col_win_night + '_grpid'].unique() wearable.data = df.reset_index() if only_largest_sleep_period: # If true, we keep only one sleep period per night. saved_hour_start_day = wearable.hour_start_experiment wearable.change_start_hour_for_experiment_day(sleep_search_window[0]) grps = wearable.data.groupby(wearable.experiment_day_col) tmp_df = [] for grp_id, grp_df in grps: gdf = grp_df.copy() gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.get_consecutive_series(gdf, "hyp_sleep_candidate") df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate", output_col).replace(-1, False) tmp_df.append(df_out) wearable.data[output_col] = pd.concat(tmp_df) wearable.change_start_hour_for_experiment_day(saved_hour_start_day) else: # Save final output wearable.data[output_col] = False wearable.data.loc[wearable.data[(wearable.data["hyp_sleep_candidate"] == 1)].index, output_col] = True # Clean up! wearable.data.drop( columns=["hyp_sleep", "hyp_sleep_candidate", "hyp_seq_id", "hyp_sleep_bin", "hyp_sleep_vard", "hyp_seq_length"], inplace=True)
def __sleep_boundaries_with_angle_change_algorithm(wearable: Wearable, output_col: str, start_hour: int = 15, cols: list = [], use_triaxial_activity=False, q_sleep: float = 0.1, minimum_len_in_minutes: int = 30, merge_tolerance_in_minutes: int = 180, factor: int = 15, operator: str = "or", # Either 'or' or 'and' only_largest_sleep_period: bool = False ): df_time = wearable.data.copy() df_time = df_time.set_index(wearable.time_col) five_min = int(5 * wearable.get_epochs_in_min()) minimum_len_in_minutes = int(minimum_len_in_minutes * wearable.get_epochs_in_min()) if use_triaxial_activity: # Step 1: df_time["hyp_rolling_x"] = df_time["hyp_act_x"].rolling("5s").median().fillna(0.0) df_time["hyp_rolling_y"] = df_time["hyp_act_y"].rolling("5s").median().fillna(0.0) df_time["hyp_rolling_z"] = df_time["hyp_act_z"].rolling("5s").median().fillna(0.0) df_time["hyp_act_z"].rolling(five_min).median().fillna(0.0) df_time["hyp_angle_z"] = (np.arctan( df_time["hyp_rolling_z"] / ((df_time['hyp_rolling_y'] ** 2 + df_time['hyp_rolling_x'] ** 2) ** ( 1 / 2)))) * 180 / np.pi # Step 2: df_time["hyp_angle_z"] = df_time["hyp_angle_z"].fillna(0.0) # Step 3: df_time["hyp_angle_z"] = df_time["hyp_angle_z"].rolling("5s").mean().fillna(0.0) cols += ["hyp_angle_z"] if operator == "or": df_time["hyp_sleep_candidate"] = False else: df_time["hyp_sleep_candidate"] = True for col in cols: # Paper's Step 4 df_time["hyp_" + col + '_diff'] = df_time[col].diff().abs() # Paper's Step 5 df_time["hyp_" + col + '_5mm'] = df_time["hyp_" + col + '_diff'].rolling(five_min).median().fillna(0.0) # Paper's Step 6 quantiles_per_day = df_time["hyp_" + col + '_5mm'].resample('24H', offset="%dh" % start_hour).quantile( q_sleep).dropna() # print(quantiles_per_day) df_time["hyp_" + col + '_10pct'] = quantiles_per_day if quantiles_per_day.index[0] < df_time.index[0]: df_time.loc[df_time.index[0], "hyp_" + col + '_10pct'] = quantiles_per_day.iloc[0] df_time["hyp_" + col + '_10pct'] = df_time["hyp_" + col + '_10pct'].fillna(method='ffill').fillna( method='bfill') df_time["hyp_" + col + '_bin'] = np.where( (df_time["hyp_" + col + '_5mm'] - (df_time["hyp_" + col + '_10pct'] * factor)) > 0, 0, 1) df_time["hyp_" + col + '_len'], _ = misc.get_consecutive_series(df_time, "hyp_" + col + '_bin') # Paper's Step 7 if operator == "or": df_time["hyp_sleep_candidate"] = df_time["hyp_sleep_candidate"] | ( (df_time["hyp_" + col + '_bin'] == 1.0) & ( df_time["hyp_" + col + '_len'] > minimum_len_in_minutes)) else: df_time["hyp_sleep_candidate"] = df_time["hyp_sleep_candidate"] & \ ((df_time["hyp_" + col + '_bin'] == 1.0) & (df_time["hyp_" + col + '_len'] > minimum_len_in_minutes)) # Gets the largest sleep_candidate per night wearable.data = df_time.reset_index() # wearable.data[output_col] = wearable.data["hyp_sleep_candidate"] wearable.data["hyp_seq_length"], wearable.data["hyp_seq_id"] = misc.get_consecutive_series(wearable.data, "hyp_sleep_candidate") # Paper's Step 8 wearable.data["hyp_sleep_candidate"], wearable.data["hyp_seq_length"], wearable.data[ "hyp_seq_id"] = misc.merge_sequences_given_tolerance(wearable.data, wearable.time_col, "hyp_sleep_candidate", merge_tolerance_in_minutes) # Paper's Step 9 if only_largest_sleep_period: # If true, we keep only one sleep period per night. saved_hour_start_day = wearable.hour_start_experiment wearable.change_start_hour_for_experiment_day(start_hour) grps = wearable.data.groupby(wearable.experiment_day_col) tmp_df = [] for grp_id, grp_df in grps: gdf = grp_df.copy() gdf["hyp_seq_length"], gdf["hyp_seq_id"] = misc.get_consecutive_series(gdf, "hyp_sleep_candidate") df_out = misc.find_largest_sequence(gdf, "hyp_sleep_candidate", output_col).replace(-1, False) tmp_df.append(df_out) wearable.data[output_col] = pd.concat(tmp_df) wearable.change_start_hour_for_experiment_day(saved_hour_start_day) else: # Save final output wearable.data[output_col] = False wearable.data.loc[wearable.data[(wearable.data["hyp_sleep_candidate"] == 1)].index, output_col] = True # Cleaning up... cols_to_drop = ["hyp_sleep_candidate", "hyp_seq_length", "hyp_seq_id"] for col in cols: cols_to_drop.append("hyp_" + col + '_diff') cols_to_drop.append("hyp_" + col + '_5mm') cols_to_drop.append("hyp_" + col + '_10pct') cols_to_drop.append("hyp_" + col + '_len') wearable.data.drop(columns=cols_to_drop, inplace=True)