Exemple #1
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations

        for len_acts, num_acts, df_activity_attr, lst_activity_attr in zip(self.len_activities, self.num_activities, \
                                                        self.df_activity_attrs, self.lst_activity_attrs):

            df = getattr(self.data, df_activity_attr)
            lst = getattr(self.data, lst_activity_attr)

            act_count = activities_count(df, lst)
            assert len(act_count) == len(lst)
            act_count = activities_count(df)
            assert len(act_count) == num_acts

            act_trans = activities_transitions(df, lst)
            assert len(act_trans) == len(lst)
            assert act_trans.values.sum() == len(df) - 1
            act_trans = activities_transitions(df)
            assert len(act_trans) == num_acts

            act_durs = activity_durations(df, lst)
            assert len(act_durs) == len(lst)
            act_durs = activity_durations(df)
            assert len(act_durs) == num_acts

            act_dist = activities_dist(df, lst, n=100)
            assert len(act_dist.columns) == len(lst)
            assert len(act_dist) == 100
            act_dist = activities_dist(df, n=100)
            assert len(act_dist.columns) == num_acts
            assert len(act_dist) == 100
Exemple #2
0
    def test_stats_activities(self):
        from pyadlml.dataset.stats.activities import activities_dist, activities_count, \
            activities_transitions, activity_durations
        df = self.data.df_activities_admin
        lst = self.data.lst_activities

        act_count = activities_count(df, lst)
        assert len(act_count) == len(lst)
        act_count = activities_count(df)
        assert len(act_count) == 2

        act_trans = activities_transitions(df, lst)
        assert len(act_trans) == len(lst)
        assert act_trans.values.sum() == len(df) - 1
        act_trans = activities_transitions(df)
        assert len(act_trans) == 2

        act_durs = activity_durations(df, lst)
        assert len(act_durs) == len(lst)
        act_durs = activity_durations(df)
        assert len(act_durs) == 2

        act_dist = activities_dist(df, lst, n=100)
        assert len(act_dist.columns) == len(lst)
        assert len(act_dist) == 100
        act_dist = activities_dist(df, n=100)
        assert len(act_dist.columns) == 2
        assert len(act_dist) == 100
Exemple #3
0
def ridge_line(df_act, t_range='day', n=1000):
    """
    https://plotly.com/python/violin/

    for one day plot the activity distribution over the day
    - sample uniform from each interval   
    """
    df = activities_dist(df_act.copy(), t_range, n)

    colors = n_colors('rgb(5, 200, 200)',
                      'rgb(200, 10, 10)',
                      len(df.columns),
                      colortype='rgb')
    data = df.values.T

    fig = go.Figure()
    i = 0
    for data_line, color in zip(data, colors):
        fig.add_trace(
            go.Violin(x=data_line, line_color=color, name=df.columns[i]))
        i += 1

    fig.update_traces(orientation='h', side='positive', width=3, points=False)
    fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
    return fig
Exemple #4
0
def ridge_line(df_acts=None,
               lst_acts=None,
               df_act_dist=None,
               idle=False,
               n=1000,
               ylim_upper=None,
               color=None,
               figsize=None,
               file_path=None):
    """
    Plots the activity density distribution over one day.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    df_act_dist : pd.DataFrame, optional
        A precomputed activity density distribution. If the *df_trans* parameter is given, parameters
        *df_acts* and *lst_acts* are ignored. The transition table can be computed
        in :ref:`stats <stats_acts_trans>`.
    n : int, default=1000
        The number of monte-carlo samples to draw.
    ylim_upper: float, optional
        The offset from the top of the plot to the first ridge_line. Set this if
        the automatically determined value is not satisfying.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_activity_ridgeline
    >>> plot_activity_ridgeline(data.df_activities)

    .. image:: ../_static/images/plots/act_ridge_line.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing.
    """
    assert not (df_acts is None and df_act_dist is None)

    title = 'Activity distribution over one day'
    xlabel = 'day'
    color = (get_primary_color() if color is None else color)

    if df_act_dist is None:
        if idle:
            df_acts = add_idle(df_acts)
        df = activities_dist(df_acts.copy(), lst_acts=lst_acts, n=n)
        if df.empty:
            raise ValueError(
                "no activity was recorded and no activity list was given.")
    else:
        df = df_act_dist

    def date_2_second(date):
        """ maps time onto seconds of a day 
        Parameters
        ----------
        date : np.datetime64
            all the dates are on the day 1990-01-01

        Returns
        -------

        """
        if pd.isnull(date):
            return -1
        val = (date - np.datetime64('1990-01-01')) / np.timedelta64(1, 's')
        total_seconds = 60 * 60 * 24
        assert val <= total_seconds and val >= 0
        return int(val)

    df = df.apply(np.vectorize(date_2_second))
    # sort every columns values ascending
    for col in df.columns:
        df[col] = df[col].sort_values()

    grouped = [(col, df[col].values) for col in df.columns]
    acts, data = zip(*grouped)
    num_act = len(list(acts))

    # infer visual properties
    figsize = (_num_items_2_ridge_figsize(num_act)
               if figsize is None else figsize)
    ylim_upper = (_num_items_2_ridge_ylimit(num_act)
                  if ylim_upper is None else ylim_upper)

    # plot the ridgeline
    fig, ax = plt.subplots(figsize=figsize)
    ridgeline(data,
              labels=acts,
              overlap=.85,
              fill=color,
              n_points=100,
              dist_scale=0.13)
    plt.title(title)

    plt.gca().spines['left'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    plt.ylim((0, ylim_upper))
    plt.xlabel(xlabel)

    # set xaxis labels
    def func(x, p):
        #x = x + 0.5
        #if x == 0.0 or str(x)[-1:] == '5':
        #    return ''
        #else:
        if True:
            if np.ceil(x / k) < 10:
                return '0{}:00'.format(int(x / k) + 1)
            else:
                return '{}:00'.format(int(x / k) + 1)

    a = 0
    b = 60 * 60 * 24
    k = (b - a) / 24

    plt.xlim((a, b))
    tcks_pos = np.arange(0, 23) * k + (-0.5 + k)

    x_locator = ticker.FixedLocator(tcks_pos)
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(func))
    ax.xaxis.set_major_locator(x_locator)
    fig.autofmt_xdate(rotation=45)

    plt.grid(zorder=0)

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
Exemple #5
0
def ridge_line(df_act,
               t_range='day',
               idle=False,
               n=1000,
               dist_scale=0.05,
               ylim_upper=1.1,
               figsize=(10, 8)):
    """
    Parameters
    ----------
    ylim_upper: float
        height that determines how many ridgelines are displayed. Adjust value to fit all 
        the ridgelines into the plot
    dist_scale: float
        the scale of the distributions of a ridgeline. 
    """
    if idle:
        df_act = add_idle(df_act)

    def date_2_second(date):
        """ maps time onto seconds of a day 
        Parameters
        ----------
        date : np.datetime64
            all the dates are on the day 1990-01-01

        Returns
        -------

        """
        val = (date - np.datetime64('1990-01-01')) / np.timedelta64(1, 's')
        total_seconds = 60 * 60 * 24
        assert val <= total_seconds and val >= 0
        return int(val)

    title = 'Activity distribution over one day'

    df = activities_dist(df_act.copy(), t_range, n)
    df = df.apply(np.vectorize(date_2_second))
    # sort every columns values ascending
    for col in df.columns:
        df[col] = df[col].sort_values()

    grouped = [(col, df[col].values) for col in df.columns]

    fig, ax = plt.subplots(figsize=figsize)
    acts, data = zip(*grouped)
    ridgeline(data,
              labels=acts,
              overlap=.85,
              fill='tab:blue',
              n_points=1000,
              dist_scale=dist_scale)
    plt.title(title)

    plt.gca().spines['left'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    plt.ylim((0, 1.1))
    plt.xlabel('day')

    # set xaxis labels
    def func(x, p):
        #x = x + 0.5
        #if x == 0.0 or str(x)[-1:] == '5':
        #    return ''
        #else:
        if True:
            if int(x / k) < 10:
                return '0{}:00'.format(int(x / k) + 1)
            else:
                return '{}:00'.format(int(x / k) + 1)

    a = 0
    b = 60 * 60 * 24
    k = (b - a) / 24

    plt.xlim((a, b))
    tcks_pos = np.arange(0, 23) * k + (-0.5 + k)

    x_locator = ticker.FixedLocator(tcks_pos)
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(func))
    ax.xaxis.set_major_locator(x_locator)
    fig.autofmt_xdate(rotation=45)

    plt.grid(zorder=0)
    plt.show()