def boxplot_duration(df_act, y_scale='norm', idle=False, figsize=(10, 8)): """ plot a boxplot of activity durations (mean) max min """ assert y_scale in ['norm', 'log'] if idle: df_act = add_idle(df_act) df = activities_duration_dist(df_act) # select data for each device activities = df['activity'].unique() df['seconds'] = df['minutes'] * 60 dat = [] for activity in activities: df_activity = df[df['activity'] == activity] #tmp = np.log(df_device['td'].dt.total_seconds()) dat.append(df_activity['seconds']) # plot boxsplot fig, ax = plt.subplots(figsize=figsize) ax.boxplot(dat, vert=False) ax.set_title('Activity durations') ax.set_yticklabels(activities, ha='right') ax.set_xlabel('log seconds') ax.set_xscale('log') # create secondary axis with # create secondary axis with time format 1s, 1m, 1d ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x)) #ax_top.set_xlabel('time') ax_top.xaxis.set_major_formatter(ticker.FuncFormatter(func_formatter_sec)) return fig
def hist_cum_duration(df_act, y_scale=None, idle=False, figsize=(9, 3)): """ plots the cummulated activities durations in a histogram for each activity """ assert y_scale in [None, 'log'] title = 'Cummulative activity durations' if y_scale == 'log': xlabel = 'log seconds' else: xlabel = 'seconds' if idle: df_act = add_idle(df_act) act_dur = activities_durations(df_act) df = act_dur[['minutes']] df.reset_index(level=0, inplace=True) df = df.sort_values(by=['minutes'], axis=0) # TODO change in activities duration to return time in seconds df['seconds'] = df['minutes'] * 60 # plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df['seconds']) if y_scale == 'log': ax.set_xscale('log') # create secondary axis with time format 1s, 1m, 1d ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x)) ax_top.set_xlabel('time') ax_top.xaxis.set_major_formatter(ticker.FuncFormatter(func_formatter_sec)) return fig
def heatmap_transitions(df_act, z_scale=None, figsize=(8, 6), idle=False): """ """ assert z_scale in [None, 'log'], 'z-scale has to be either of type None or log' title = 'Activity transitions' z_label = 'count' df_act = add_idle(df_act) if idle else df_act # get the list of cross tabulations per t_window df = activities_transitions(df_act) act_lst = list(df.columns) x_labels = act_lst y_labels = act_lst values = df.values log = True if z_scale == 'log' else False valfmt = '{x:.0f}' # begin plotting fig, ax = plt.subplots(figsize=figsize) im, cbar = heatmap_square(values, y_labels, x_labels, log=log, ax=ax, cbarlabel=z_label) texts = annotate_heatmap(im, textcolors=("white", "black"), log=log, valfmt=valfmt) ax.set_title(title) return fig
def heatmap_contingency_overlaps(df_dev, df_act, figsize=(18, 12), z_scale='log', idle=False): """ """ cbarlabel = 'second overlap' title = 'Cross correlation activites' if idle: df_act = add_idle(df_act.copy()) df_con = contingency_table_interval_overlaps(df_act, df_dev) tmp = df_con.reset_index() tmp['index'] = tmp['device'] + ' ' + tmp['val'].astype(str) tmp = tmp.set_index('index') tmp = tmp.drop(['device', 'val'], axis=1) # convert time to seconds tmp = tmp.astype(int) / 1000000000 vals = tmp.values.T acts = tmp.columns devs = list(tmp.index) #if z_scale == 'log': # format_func = lambda x, p: func_formatter_sec(np.exp(x), p) #else: format_func = lambda x, p: func_formatter_sec(x, p) valfmt = matplotlib.ticker.FuncFormatter(format_func) heatmap_contingency(vals, acts, devs, cbarlabel, title, valfmt, figsize, z_scale=z_scale)
def hist_counts(df_act, y_scale=None, idle=False, figsize=(9, 3)): """ plots the activities durations against each other """ assert y_scale in [None, 'log'] df_act = df_act.copy() col_label = 'occurence' title = 'Activity occurrences' xlabel = 'counts' if idle: df_act = add_idle(df_act) df = activities_count(df_act) df.reset_index(level=0, inplace=True) df = df.sort_values(by=['occurence'], axis=0) # plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df['occurence']) if y_scale == 'log': ax.set_xscale('log') return fig
def ridge_line(df_acts=None, lst_acts=None, df_act_dist=None, idle=False, n=1000, ylim_upper=None, color=None, figsize=None, file_path=None): """ Plots the activity density distribution over one day. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. df_act_dist : pd.DataFrame, optional A precomputed activity density distribution. If the *df_trans* parameter is given, parameters *df_acts* and *lst_acts* are ignored. The transition table can be computed in :ref:`stats <stats_acts_trans>`. n : int, default=1000 The number of monte-carlo samples to draw. ylim_upper: float, optional The offset from the top of the plot to the first ridge_line. Set this if the automatically determined value is not satisfying. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. color : str, optional sets the color of the plot. When not set, the primary theming color is used. Learn more about theming in the :ref:`user guide <theming>` idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plots import plot_activity_ridgeline >>> plot_activity_ridgeline(data.df_activities) .. image:: ../_static/images/plots/act_ridge_line.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing. """ assert not (df_acts is None and df_act_dist is None) title = 'Activity distribution over one day' xlabel = 'day' color = (get_primary_color() if color is None else color) if df_act_dist is None: if idle: df_acts = add_idle(df_acts) df = activities_dist(df_acts.copy(), lst_acts=lst_acts, n=n) if df.empty: raise ValueError( "no activity was recorded and no activity list was given.") else: df = df_act_dist def date_2_second(date): """ maps time onto seconds of a day Parameters ---------- date : np.datetime64 all the dates are on the day 1990-01-01 Returns ------- """ if pd.isnull(date): return -1 val = (date - np.datetime64('1990-01-01')) / np.timedelta64(1, 's') total_seconds = 60 * 60 * 24 assert val <= total_seconds and val >= 0 return int(val) df = df.apply(np.vectorize(date_2_second)) # sort every columns values ascending for col in df.columns: df[col] = df[col].sort_values() grouped = [(col, df[col].values) for col in df.columns] acts, data = zip(*grouped) num_act = len(list(acts)) # infer visual properties figsize = (_num_items_2_ridge_figsize(num_act) if figsize is None else figsize) ylim_upper = (_num_items_2_ridge_ylimit(num_act) if ylim_upper is None else ylim_upper) # plot the ridgeline fig, ax = plt.subplots(figsize=figsize) ridgeline(data, labels=acts, overlap=.85, fill=color, n_points=100, dist_scale=0.13) plt.title(title) plt.gca().spines['left'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.gca().spines['top'].set_visible(False) plt.ylim((0, ylim_upper)) plt.xlabel(xlabel) # set xaxis labels def func(x, p): #x = x + 0.5 #if x == 0.0 or str(x)[-1:] == '5': # return '' #else: if True: if np.ceil(x / k) < 10: return '0{}:00'.format(int(x / k) + 1) else: return '{}:00'.format(int(x / k) + 1) a = 0 b = 60 * 60 * 24 k = (b - a) / 24 plt.xlim((a, b)) tcks_pos = np.arange(0, 23) * k + (-0.5 + k) x_locator = ticker.FixedLocator(tcks_pos) ax.xaxis.set_major_formatter(ticker.FuncFormatter(func)) ax.xaxis.set_major_locator(x_locator) fig.autofmt_xdate(rotation=45) plt.grid(zorder=0) if file_path is not None: savefig(fig, file_path) return else: return fig
def heatmap_transitions(df_acts=None, lst_acts=None, df_trans=None, z_scale="linear", figsize=None, idle=False, numbers=True, grid=True, cmap=None, file_path=None): """ Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. df_trans : pd.DataFrame A precomputed transition table. If the *df_trans* parameter is given, parameters *df_acts* and *lst_acts* are ignored. The transition table can be computed in :ref:`stats <stats_acts_trans>`. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. z_scale : {"log", "linear"}, default: None The axis scale type to apply. numbers : bool, default: True Whether to display numbers inside the heatmaps fields or not. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. cmap : str or Colormap, optional The Colormap instance or registered colormap name used to map scalar data to colors. This parameter is ignored for RGB(A) data. Defaults 'viridis'. grid : bool, default: True determines whether to display a white grid, seperating the fields or not. file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plots import plot_activity_hm_transitions >>> plot_activity_hm_transitions(data.df_activities) .. image:: ../_static/images/plots/act_hm_trans.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing. """ assert z_scale in [None, 'log'], 'z-scale has to be either of type None or log' assert not (df_acts is None and df_trans is None) title = 'Activity transitions' z_label = 'count' if df_trans is None: df_acts = add_idle(df_acts) if idle else df_acts df = activities_transitions(df_acts, lst_acts=lst_acts) else: df = df_trans # get the list of cross tabulations per t_window act_lst = list(df.columns) num_act = len(act_lst) figsize = (_num_items_2_heatmap_square_figsize(num_act) if figsize is None else figsize) cmap = (get_sequential_color() if cmap is None else cmap) x_labels = act_lst y_labels = act_lst values = df.values log = True if z_scale == 'log' else False valfmt = '{x:.0f}' # begin plotting fig, ax = plt.subplots(figsize=figsize) im, cbar = heatmap_square(values, y_labels, x_labels, log=log, cmap=cmap, ax=ax, cbarlabel=z_label, grid=grid) if numbers: texts = annotate_heatmap(im, textcolors=("white", "black"), log=log, valfmt=valfmt) ax.set_title(title) if file_path is not None: savefig(fig, file_path) return else: return fig
def hist_cum_duration(df_acts=None, lst_acts=None, df_dur=None, y_scale=None, idle=False, figsize=None, color=None, file_path=None): """ Plots the cumulative duration for each activity in a bar plot. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. y_scale : {"log", "linear"}, default: None The axis scale type to apply. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. color : str, optional sets the color of the plot. When not set, the primary theming color is used. Learn more about theming in the :ref:`user guide <theming>` file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plots import plot_activity_bar_duration >>> plot_activity_bar_duration(data.df_activities) .. image:: ../_static/images/plots/act_bar_dur.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing """ assert y_scale in [None, 'log'] assert not (df_acts is None and df_dur is None) title = 'Cummulative activity durations' xlabel = 'seconds' freq = 'seconds' color = (get_primary_color() if color is None else color) if df_dur is None: if idle: df_acts = add_idle(df_acts.copy()) df = activity_durations(df_acts, lst_acts=lst_acts, time_unit=freq) else: df = df_dur df = df.sort_values(by=[freq], axis=0) num_act = len(df) figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize) # plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df['seconds'], color=color) if y_scale == 'log': ax.set_xscale('log') # create secondary axis with time format 1s, 1m, 1d ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x)) ax_top.set_xlabel('time') ax_top.xaxis.set_major_formatter( ticker.FuncFormatter(func_formatter_seconds2time)) if file_path is not None: savefig(fig, file_path) return else: return fig
def hist_counts(df_acts=None, lst_acts=None, df_ac=None, y_scale="linear", idle=False, figsize=None, color=None, file_path=None): """ Plot a bar chart displaying how often activities are occurring. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. y_scale : {"log", "linear"}, default: linear The axis scale type to apply. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. color : str, optional sets the color of the plot. When not set, the primary theming color is used. Learn more about theming in the :ref:`user guide <theming>` file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plot import plot_activity_bar_count >>> plot_activity_bar_count(data.df_activities, idle=True); .. image:: ../_static/images/plots/act_bar_cnt.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing """ assert not (df_acts is None and df_ac is None) assert y_scale in [None, 'log'] title = 'Activity occurrences' col_label = 'occurrence' xlabel = 'counts' color = (get_primary_color() if color is None else color) # create statistics if the don't exists if df_ac is None: df_acts = df_acts.copy() if idle: df_acts = add_idle(df_acts) df = activities_count(df_acts, lst_acts=lst_acts) else: df = df_ac # prepare dataframe for plotting df.reset_index(level=0, inplace=True) df = df.sort_values(by=[col_label], axis=0) # define plot modalities num_act = len(df) figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize) # create plot fig, ax = plt.subplots(figsize=figsize) plt.title(title) plt.xlabel(xlabel) ax.barh(df['activity'], df[col_label], color=color) if y_scale == 'log': ax.set_xscale('log') # save or return fig if file_path is not None: savefig(fig, file_path) return else: return fig
def boxplot_duration(df_acts, lst_acts=None, y_scale=None, idle=False, figsize=None, file_path=None): """ Plot a boxplot for activity durations. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. lst_acts : lst of str, optional A list of activities that are included in the statistic. The list can be a subset of the recorded activities or contain activities that are not recorded. figsize : (float, float), default: None width, height in inches. If not provided, the figsize is inferred by automatically. y_scale : {"log", "linear"}, default: None The axis scale type to apply. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plots import plot_devices_bp_duration >>> plot_devices_bp_duration(data.df_activities) .. image:: ../_static/images/plots/act_bp.png :height: 300px :width: 500 px :scale: 90 % :alt: alternate text :align: center Returns ------- res : fig or None Either a figure if file_path is not specified or nothing """ assert y_scale in [None, 'log'] title = 'Activity durations' xlabel = 'seconds' if idle: df_acts = add_idle(df_acts) df = activities_duration_dist(df_acts, lst_acts=lst_acts) # select data for each device activities = df['activity'].unique() df['seconds'] = df['minutes'] * 60 num_act = len(activities) figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize) dat = [] for activity in activities: df_activity = df[df['activity'] == activity] #tmp = np.log(df_device['td'].dt.total_seconds()) dat.append(df_activity['seconds']) # plot boxsplot fig, ax = plt.subplots(figsize=figsize) ax.boxplot(dat, vert=False) ax.set_title(title) ax.set_yticklabels(activities, ha='right') ax.set_xlabel(xlabel) ax.set_xscale('log') # create secondary axis with time format 1s, 1m, 1d ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x)) #ax_top.set_xlabel('time') ax_top.xaxis.set_major_formatter( ticker.FuncFormatter(func_formatter_seconds2time)) if file_path is not None: savefig(fig, file_path) return else: return fig
def ridge_line(df_act, t_range='day', idle=False, n=1000, dist_scale=0.05, ylim_upper=1.1, figsize=(10, 8)): """ Parameters ---------- ylim_upper: float height that determines how many ridgelines are displayed. Adjust value to fit all the ridgelines into the plot dist_scale: float the scale of the distributions of a ridgeline. """ if idle: df_act = add_idle(df_act) def date_2_second(date): """ maps time onto seconds of a day Parameters ---------- date : np.datetime64 all the dates are on the day 1990-01-01 Returns ------- """ val = (date - np.datetime64('1990-01-01')) / np.timedelta64(1, 's') total_seconds = 60 * 60 * 24 assert val <= total_seconds and val >= 0 return int(val) title = 'Activity distribution over one day' df = activities_dist(df_act.copy(), t_range, n) df = df.apply(np.vectorize(date_2_second)) # sort every columns values ascending for col in df.columns: df[col] = df[col].sort_values() grouped = [(col, df[col].values) for col in df.columns] fig, ax = plt.subplots(figsize=figsize) acts, data = zip(*grouped) ridgeline(data, labels=acts, overlap=.85, fill='tab:blue', n_points=1000, dist_scale=dist_scale) plt.title(title) plt.gca().spines['left'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.gca().spines['top'].set_visible(False) plt.ylim((0, 1.1)) plt.xlabel('day') # set xaxis labels def func(x, p): #x = x + 0.5 #if x == 0.0 or str(x)[-1:] == '5': # return '' #else: if True: if int(x / k) < 10: return '0{}:00'.format(int(x / k) + 1) else: return '{}:00'.format(int(x / k) + 1) a = 0 b = 60 * 60 * 24 k = (b - a) / 24 plt.xlim((a, b)) tcks_pos = np.arange(0, 23) * k + (-0.5 + k) x_locator = ticker.FixedLocator(tcks_pos) ax.xaxis.set_major_formatter(ticker.FuncFormatter(func)) ax.xaxis.set_major_locator(x_locator) fig.autofmt_xdate(rotation=45) plt.grid(zorder=0) plt.show()
def heatmap_contingency_overlaps(df_devs=None, df_acts=None, df_con_tab=None, figsize=None, \ z_scale='log', idle=False, numbers=True, file_path=None): """ Plots a heatmap the device on and off intervals are measured against the activities Parameters ---------- df_devs : pd.DataFrame, optional recorded devices from a dataset. For more information refer to :ref:`user guide<device_dataframe>`. If the parameter *df_devs* is not set, the parameter *df_con_tab* has to be set. df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. If the parameter *df_acts* is not set, the parameter *df_con_tab* has to be set. df_con_tab : pd.DataFrame, optional A precomputed contingency table. If the *df_con_tab* parameter is given, parameters *df_acts* and *df_devs* are ignored. The contingency table can be computed in :ref:`stats <stats_dna_con_dur>`. figsize : (float, float), optional width, height in inches. If not provided, the figsize is inferred by automatically. z_scale : {"log", "linear"}, default: 'log' The axis scale type to apply. numbers : bool, default: True Whether to display numbers inside the heatmaps fields or not. idle : bool, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. file_path : str, optional If set, saves the plot under the given file path and return *None* instead of returning the figure. Examples -------- >>> from pyadlml.plot import plot_hm_contingency_duration >>> plot_hm_contingency_duration(data.df_devices, data.df_activities) .. image:: ../_static/images/plots/cont_hm_duration.png :height: 300px :width: 800 px :scale: 90 % :alt: alternate text :align: center Returns ------- fig : Figure or None If the parameter file_path is specified, the method return None rather than a matplotlib figure. """ assert (df_devs is not None and df_acts is not None) or df_con_tab is not None title = 'Mutual time: activities vs. devices' cbarlabel = 'mutual time in seconds' if df_con_tab is None: if idle: df_acts = add_idle(df_acts.copy()) df_con = contingency_duration(df_devs, df_acts) else: df_con = df_con_tab # convert time (ns) to seconds df_con = df_con.astype(int) / 1000000000 # rename labels df_con = df_con.reset_index(drop=False) df_con['index'] = df_con['index'].apply(lambda x: x if "Off" in x else "On") df_con = df_con.set_index('index') # set values vals = df_con.values.T acts = df_con.columns.values devs = list(df_con.index) valfmt = matplotlib.ticker.FuncFormatter( lambda x, p: func_formatter_seconds2time(x, p)) heatmap_contingency(acts, devs, vals, title, cbarlabel, valfmt=valfmt, figsize=figsize, z_scale=z_scale, numbers=numbers)