Example #1
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations

        for len_acts, num_acts, df_activity_attr, lst_activity_attr in zip(self.len_activities, self.num_activities, \
                                                        self.df_activity_attrs, self.lst_activity_attrs):

            df = getattr(self.data, df_activity_attr)
            lst = getattr(self.data, lst_activity_attr)

            act_count = activities_count(df, lst)
            assert len(act_count) == len(lst)
            act_count = activities_count(df)
            assert len(act_count) == num_acts

            act_trans = activities_transitions(df, lst)
            assert len(act_trans) == len(lst)
            assert act_trans.values.sum() == len(df) - 1
            act_trans = activities_transitions(df)
            assert len(act_trans) == num_acts

            act_durs = activity_durations(df, lst)
            assert len(act_durs) == len(lst)
            act_durs = activity_durations(df)
            assert len(act_durs) == num_acts

            act_dist = activities_dist(df, lst, n=100)
            assert len(act_dist.columns) == len(lst)
            assert len(act_dist) == 100
            act_dist = activities_dist(df, n=100)
            assert len(act_dist.columns) == num_acts
            assert len(act_dist) == 100
Example #2
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations
        df = self.data.df_activities_admin
        lst = self.data.lst_activities

        act_count = activities_count(df, lst)
        assert len(act_count) == len(lst)
        act_count = activities_count(df)
        assert len(act_count) == 2

        act_trans = activities_transitions(df, lst)
        assert len(act_trans) == len(lst)
        assert act_trans.values.sum() == len(df) - 1
        act_trans = activities_transitions(df)
        assert len(act_trans) == 2

        act_durs = activity_durations(df, lst)
        assert len(act_durs) == len(lst)
        act_durs = activity_durations(df)
        assert len(act_durs) == 2

        act_dist = activities_dist(df, lst, n=100)
        assert len(act_dist.columns) == len(lst)
        assert len(act_dist) == 100
        act_dist = activities_dist(df, n=100)
        assert len(act_dist.columns) == 2
        assert len(act_dist) == 100
Example #3
0
def hist_cum_duration(df_act, y_scale='both'):
    """
    plots the cummulated activities durations in a histogram for each activity 

    """
    assert y_scale in ['norm', 'log', 'both']

    title = 'Activity cummulative durations'

    act_dur = activity_durations(df_act.copy())
    df = act_dur[['minutes']]
    df.reset_index(level=0, inplace=True)
    if y_scale in ['norm', 'log']:
        if y_scale == 'log':
            df['minutes'] = np.log(df['minutes'])
            labels = {'minutes': 'log minutes'}
        else:
            labels = {'minutes': 'minutes'}

        df = df.sort_values(by=['minutes'], axis=0)
        fig = px.bar(
            df,
            y='activity',
            x='minutes',
            title=title,
            labels=labels,
            height=400,
            #hover_data=['fraction'] TODO add the fraction by hovering
        )
    else:
        df = df.sort_values(by=['minutes'], axis=0)
        col_label = 'minutes'
        col_label_log = 'log minutes'
        col_activity = 'activity'
        df[col_label_log] = np.log(df[col_label])

        fig = go.Figure()
        fig.add_trace(go.Bar(x=df[col_label], y=df[col_activity], \
                                orientation='h', name='duration'))
        fig.add_trace(go.Bar(x=df[col_label_log], y=df[col_activity], \
                                orientation='h', name='log duration', \
                                visible=False))
        # Add dropdown
        fig.update_layout(title=title,
                          xaxis_title="duration",
                          yaxis_title='activities',
                          updatemenus=[
                              dict(buttons=list([
                                  dict(label="no scaling",
                                       method="update",
                                       args=[{
                                           "visible": [True, False]
                                       }, {
                                           "title": title,
                                           "xaxis_title": 'asdf'
                                       }]),
                                  dict(label="log scaled",
                                       method="update",
                                       args=[{
                                           "visible": [False, True]
                                       }, {
                                           "title": title,
                                           "xaxis_title": 'asdf'
                                       }])
                              ]),
                                   active=0,
                                   direction="up",
                                   showactive=True,
                                   x=1.0,
                                   xanchor="right",
                                   y=-0.2,
                                   yanchor="bottom"),
                          ])

    return fig
Example #4
0
def hist_cum_duration(df_acts=None,
                      lst_acts=None,
                      df_dur=None,
                      y_scale=None,
                      idle=False,
                      figsize=None,
                      color=None,
                      file_path=None):
    """
    Plots the cumulative duration for each activity in a bar plot.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    y_scale : {"log", "linear"}, default: None
        The axis scale type to apply.
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_activity_bar_duration
    >>> plot_activity_bar_duration(data.df_activities)

    .. image:: ../_static/images/plots/act_bar_dur.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing
    """
    assert y_scale in [None, 'log']
    assert not (df_acts is None and df_dur is None)

    title = 'Cummulative activity durations'
    xlabel = 'seconds'
    freq = 'seconds'
    color = (get_primary_color() if color is None else color)

    if df_dur is None:
        if idle:
            df_acts = add_idle(df_acts.copy())
        df = activity_durations(df_acts, lst_acts=lst_acts, time_unit=freq)
    else:
        df = df_dur
    df = df.sort_values(by=[freq], axis=0)

    num_act = len(df)
    figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize)

    # plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(xlabel)
    ax.barh(df['activity'], df['seconds'], color=color)
    if y_scale == 'log':
        ax.set_xscale('log')

    # create secondary axis with time format 1s, 1m, 1d
    ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
    ax_top.set_xlabel('time')
    ax_top.xaxis.set_major_formatter(
        ticker.FuncFormatter(func_formatter_seconds2time))

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig