Example #1
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations

        for len_acts, num_acts, df_activity_attr, lst_activity_attr in zip(self.len_activities, self.num_activities, \
                                                        self.df_activity_attrs, self.lst_activity_attrs):

            df = getattr(self.data, df_activity_attr)
            lst = getattr(self.data, lst_activity_attr)

            act_count = activities_count(df, lst)
            assert len(act_count) == len(lst)
            act_count = activities_count(df)
            assert len(act_count) == num_acts

            act_trans = activities_transitions(df, lst)
            assert len(act_trans) == len(lst)
            assert act_trans.values.sum() == len(df) - 1
            act_trans = activities_transitions(df)
            assert len(act_trans) == num_acts

            act_durs = activity_durations(df, lst)
            assert len(act_durs) == len(lst)
            act_durs = activity_durations(df)
            assert len(act_durs) == num_acts

            act_dist = activities_dist(df, lst, n=100)
            assert len(act_dist.columns) == len(lst)
            assert len(act_dist) == 100
            act_dist = activities_dist(df, n=100)
            assert len(act_dist.columns) == num_acts
            assert len(act_dist) == 100
Example #2
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations
        df = self.data.df_activities_admin
        lst = self.data.lst_activities

        act_count = activities_count(df, lst)
        assert len(act_count) == len(lst)
        act_count = activities_count(df)
        assert len(act_count) == 2

        act_trans = activities_transitions(df, lst)
        assert len(act_trans) == len(lst)
        assert act_trans.values.sum() == len(df) - 1
        act_trans = activities_transitions(df)
        assert len(act_trans) == 2

        act_durs = activity_durations(df, lst)
        assert len(act_durs) == len(lst)
        act_durs = activity_durations(df)
        assert len(act_durs) == 2

        act_dist = activities_dist(df, lst, n=100)
        assert len(act_dist.columns) == len(lst)
        assert len(act_dist) == 100
        act_dist = activities_dist(df, n=100)
        assert len(act_dist.columns) == 2
        assert len(act_dist) == 100
Example #3
0
def hist_counts(df_act, y_scale='norm'):
    """
    plots the activities durations against each other
    """
    assert y_scale in ['norm', 'log']

    col_label = 'occurence'
    title = 'activity occurrences'

    df = activities_count(df_act.copy())
    df.reset_index(level=0, inplace=True)

    if y_scale == 'log':
        df[col_label] = np.log(df[col_label])
        labels = {col_label: 'log count'}
    else:
        labels = {col_label: 'count'}

    df = df.sort_values(by=['occurence'], axis=0)
    fig = px.bar(df,
                 title=title,
                 y='activity',
                 x=col_label,
                 orientation='h',
                 labels=labels,
                 height=400)
    return fig
Example #4
0
def hist_counts(df_act, y_scale=None, idle=False, figsize=(9, 3)):
    """ plots the activities durations against each other
    """
    assert y_scale in [None, 'log']
    df_act = df_act.copy()

    col_label = 'occurence'
    title = 'Activity occurrences'
    xlabel = 'counts'

    if idle:
        df_act = add_idle(df_act)
    df = activities_count(df_act)
    df.reset_index(level=0, inplace=True)
    df = df.sort_values(by=['occurence'], axis=0)

    # plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(xlabel)
    ax.barh(df['activity'], df['occurence'])
    if y_scale == 'log':
        ax.set_xscale('log')
    return fig
Example #5
0
def hist_counts(df_acts=None,
                lst_acts=None,
                df_ac=None,
                y_scale="linear",
                idle=False,
                figsize=None,
                color=None,
                file_path=None):
    """
    Plot a bar chart displaying how often activities are occurring.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    y_scale : {"log", "linear"}, default: linear
        The axis scale type to apply.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plot import plot_activity_bar_count
    >>> plot_activity_bar_count(data.df_activities, idle=True);

    .. image:: ../_static/images/plots/act_bar_cnt.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing 
    """
    assert not (df_acts is None and df_ac is None)
    assert y_scale in [None, 'log']

    title = 'Activity occurrences'
    col_label = 'occurrence'
    xlabel = 'counts'
    color = (get_primary_color() if color is None else color)

    # create statistics if the don't exists
    if df_ac is None:
        df_acts = df_acts.copy()
        if idle:
            df_acts = add_idle(df_acts)
        df = activities_count(df_acts, lst_acts=lst_acts)
    else:
        df = df_ac

    # prepare dataframe for plotting
    df.reset_index(level=0, inplace=True)
    df = df.sort_values(by=[col_label], axis=0)

    # define plot modalities
    num_act = len(df)
    figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize)

    # create plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(xlabel)
    ax.barh(df['activity'], df[col_label], color=color)

    if y_scale == 'log':
        ax.set_xscale('log')

    # save or return fig
    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig