def test_stats_activities(self): from pyadlml.dataset.stats.activities import activities_dist, activities_count, \ activities_transitions, activity_durations for len_acts, num_acts, df_activity_attr, lst_activity_attr in zip(self.len_activities, self.num_activities, \ self.df_activity_attrs, self.lst_activity_attrs): df = getattr(self.data, df_activity_attr) lst = getattr(self.data, lst_activity_attr) act_count = activities_count(df, lst) assert len(act_count) == len(lst) act_count = activities_count(df) assert len(act_count) == num_acts act_trans = activities_transitions(df, lst) assert len(act_trans) == len(lst) assert act_trans.values.sum() == len(df) - 1 act_trans = activities_transitions(df) assert len(act_trans) == num_acts act_durs = activity_durations(df, lst) assert len(act_durs) == len(lst) act_durs = activity_durations(df) assert len(act_durs) == num_acts act_dist = activities_dist(df, lst, n=100) assert len(act_dist.columns) == len(lst) assert len(act_dist) == 100 act_dist = activities_dist(df, n=100) assert len(act_dist.columns) == num_acts assert len(act_dist) == 100
def test_stats_activities(self): from pyadlml.dataset.stats.activities import activities_dist, activities_count, \ activities_transitions, activity_durations df = self.data.df_activities_admin lst = self.data.lst_activities act_count = activities_count(df, lst) assert len(act_count) == len(lst) act_count = activities_count(df) assert len(act_count) == 2 act_trans = activities_transitions(df, lst) assert len(act_trans) == len(lst) assert act_trans.values.sum() == len(df) - 1 act_trans = activities_transitions(df) assert len(act_trans) == 2 act_durs = activity_durations(df, lst) assert len(act_durs) == len(lst) act_durs = activity_durations(df) assert len(act_durs) == 2 act_dist = activities_dist(df, lst, n=100) assert len(act_dist.columns) == len(lst) assert len(act_dist) == 100 act_dist = activities_dist(df, n=100) assert len(act_dist.columns) == 2 assert len(act_dist) == 100
def hist_cum_duration(df_act, y_scale='both'): """ plots the cummulated activities durations in a histogram for each activity """ assert y_scale in ['norm', 'log', 'both'] title = 'Activity cummulative durations' act_dur = activity_durations(df_act.copy()) df = act_dur[['minutes']] df.reset_index(level=0, inplace=True) if y_scale in ['norm', 'log']: if y_scale == 'log': df['minutes'] = np.log(df['minutes']) labels = {'minutes': 'log minutes'} else: labels = {'minutes': 'minutes'} df = df.sort_values(by=['minutes'], axis=0) fig = px.bar( df, y='activity', x='minutes', title=title, labels=labels, height=400, #hover_data=['fraction'] TODO add the fraction by hovering ) else: df = df.sort_values(by=['minutes'], axis=0) col_label = 'minutes' col_label_log = 'log minutes' col_activity = 'activity' df[col_label_log] = np.log(df[col_label]) fig = go.Figure() fig.add_trace(go.Bar(x=df[col_label], y=df[col_activity], \ orientation='h', name='duration')) fig.add_trace(go.Bar(x=df[col_label_log], y=df[col_activity], \ orientation='h', name='log duration', \ visible=False)) # Add dropdown fig.update_layout(title=title, xaxis_title="duration", yaxis_title='activities', updatemenus=[ dict(buttons=list([ dict(label="no scaling", method="update", args=[{ "visible": [True, False] }, { "title": title, "xaxis_title": 'asdf' }]), dict(label="log scaled", method="update", args=[{ "visible": [False, True] }, { "title": title, "xaxis_title": 'asdf' }]) ]), active=0, direction="up", showactive=True, x=1.0, xanchor="right", y=-0.2, yanchor="bottom"), ]) return fig
def hist_cum_duration(df_acts=None, lst_acts=None, df_dur=None, y_scale=None, idle=False, figsize=None, color=None, file_path=None): """ Plots the cumulative duration for each activity in a bar plot. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. y_scale : {"log", "linear"}, default: None The axis scale type to apply. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. color : str, optional sets the color of the plot. When not set, the primary theming color is used. Learn more about theming in the :ref:`user guide <theming>` file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plots import plot_activity_bar_duration >>> plot_activity_bar_duration(data.df_activities) .. image:: ../_static/images/plots/act_bar_dur.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing """ assert y_scale in [None, 'log'] assert not (df_acts is None and df_dur is None) title = 'Cummulative activity durations' xlabel = 'seconds' freq = 'seconds' color = (get_primary_color() if color is None else color) if df_dur is None: if idle: df_acts = add_idle(df_acts.copy()) df = activity_durations(df_acts, lst_acts=lst_acts, time_unit=freq) else: df = df_dur df = df.sort_values(by=[freq], axis=0) num_act = len(df) figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize) # plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df['seconds'], color=color) if y_scale == 'log': ax.set_xscale('log') # create secondary axis with time format 1s, 1m, 1d ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x)) ax_top.set_xlabel('time') ax_top.xaxis.set_major_formatter( ticker.FuncFormatter(func_formatter_seconds2time)) if file_path is not None: savefig(fig, file_path) return else: return fig