def test_stats_activities(self): from pyadlml.dataset.stats.activities import activities_dist, activities_count, \ activities_transitions, activity_durations for len_acts, num_acts, df_activity_attr, lst_activity_attr in zip(self.len_activities, self.num_activities, \ self.df_activity_attrs, self.lst_activity_attrs): df = getattr(self.data, df_activity_attr) lst = getattr(self.data, lst_activity_attr) act_count = activities_count(df, lst) assert len(act_count) == len(lst) act_count = activities_count(df) assert len(act_count) == num_acts act_trans = activities_transitions(df, lst) assert len(act_trans) == len(lst) assert act_trans.values.sum() == len(df) - 1 act_trans = activities_transitions(df) assert len(act_trans) == num_acts act_durs = activity_durations(df, lst) assert len(act_durs) == len(lst) act_durs = activity_durations(df) assert len(act_durs) == num_acts act_dist = activities_dist(df, lst, n=100) assert len(act_dist.columns) == len(lst) assert len(act_dist) == 100 act_dist = activities_dist(df, n=100) assert len(act_dist.columns) == num_acts assert len(act_dist) == 100
def test_stats_activities(self): from pyadlml.dataset.stats.activities import activities_dist, activities_count, \ activities_transitions, activity_durations df = self.data.df_activities_admin lst = self.data.lst_activities act_count = activities_count(df, lst) assert len(act_count) == len(lst) act_count = activities_count(df) assert len(act_count) == 2 act_trans = activities_transitions(df, lst) assert len(act_trans) == len(lst) assert act_trans.values.sum() == len(df) - 1 act_trans = activities_transitions(df) assert len(act_trans) == 2 act_durs = activity_durations(df, lst) assert len(act_durs) == len(lst) act_durs = activity_durations(df) assert len(act_durs) == 2 act_dist = activities_dist(df, lst, n=100) assert len(act_dist.columns) == len(lst) assert len(act_dist) == 100 act_dist = activities_dist(df, n=100) assert len(act_dist.columns) == 2 assert len(act_dist) == 100
def hist_counts(df_act, y_scale='norm'): """ plots the activities durations against each other """ assert y_scale in ['norm', 'log'] col_label = 'occurence' title = 'activity occurrences' df = activities_count(df_act.copy()) df.reset_index(level=0, inplace=True) if y_scale == 'log': df[col_label] = np.log(df[col_label]) labels = {col_label: 'log count'} else: labels = {col_label: 'count'} df = df.sort_values(by=['occurence'], axis=0) fig = px.bar(df, title=title, y='activity', x=col_label, orientation='h', labels=labels, height=400) return fig
def hist_counts(df_act, y_scale=None, idle=False, figsize=(9, 3)): """ plots the activities durations against each other """ assert y_scale in [None, 'log'] df_act = df_act.copy() col_label = 'occurence' title = 'Activity occurrences' xlabel = 'counts' if idle: df_act = add_idle(df_act) df = activities_count(df_act) df.reset_index(level=0, inplace=True) df = df.sort_values(by=['occurence'], axis=0) # plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df['occurence']) if y_scale == 'log': ax.set_xscale('log') return fig
def hist_counts(df_acts=None, lst_acts=None, df_ac=None, y_scale="linear", idle=False, figsize=None, color=None, file_path=None): """ Plot a bar chart displaying how often activities are occurring. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. y_scale : {"log", "linear"}, default: linear The axis scale type to apply. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. color : str, optional sets the color of the plot. When not set, the primary theming color is used. Learn more about theming in the :ref:`user guide <theming>` file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plot import plot_activity_bar_count >>> plot_activity_bar_count(data.df_activities, idle=True); .. image:: ../_static/images/plots/act_bar_cnt.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing """ assert not (df_acts is None and df_ac is None) assert y_scale in [None, 'log'] title = 'Activity occurrences' col_label = 'occurrence' xlabel = 'counts' color = (get_primary_color() if color is None else color) # create statistics if the don't exists if df_ac is None: df_acts = df_acts.copy() if idle: df_acts = add_idle(df_acts) df = activities_count(df_acts, lst_acts=lst_acts) else: df = df_ac # prepare dataframe for plotting df.reset_index(level=0, inplace=True) df = df.sort_values(by=[col_label], axis=0) # define plot modalities num_act = len(df) figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize) # create plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df[col_label], color=color) if y_scale == 'log': ax.set_xscale('log') # save or return fig if file_path is not None: savefig(fig, file_path) return else: return fig