def averages_by_period_table(data, variable, freq='M', normalize: bool = False, age_group_size=10, gradient_axis=1): d = data[variable].drop(columns='unknown') d = ic.aggregate_age_groups(d, age_group_size) d = ic.average_by_period(d, freq=freq) if normalize: d = d.div(d.sum(axis=1), axis=0) fmt = '{:.1%}' else: fmt = '{:.0f}' label = 'casi' if variable == 'cases' else 'decessi' if freq == 'M': index_name = 'Mese' freq_label = 'mese per mese' else: index_name = 'Data di fine periodo' freq_label = f'periodi di {freq} giorni' caption = f'Numero medio di {label} giornalieri ({freq_label})' d.columns = d.columns.rename('Età') d.index = d.index.rename(index_name) return (d.style.format(fmt).background_gradient( axis=gradient_axis).set_caption(caption))
def double_area_chart_of_running_averages( data: pd.DataFrame, variable: str = "cases", *, strings: Translation, window: int = 14, age_group_size: int = 20, period: Optional[Period] = None, **figure_args, ): check_variable(variable) check_age_group_size(age_group_size) period_slice = slice(*period) if period else slice(data.index[0], None) r = resample_if_needed(data[variable].drop(columns="unknown"), "D") d = r[period_slice] d = d.diff(window).iloc[window:] d = ic.aggregate_age_groups(d, cuts=age_group_size, fmt_last="{}+") if window > 1: d = d / window figure_args.setdefault("figsize", DOUBLE_CHART_FIGSIZE) fig, ax = plt.subplots(2, 1, **figure_args) area_chart(d, ax=ax[0], lang=strings.lang) area_chart(d, ax=ax[1], normalize=True, lang=strings.lang) ax[0].set_title( strings.get(f"running_average_{variable}_title", count=window)) return fig
def double_area_chart_of_cumulative_counts( data: pd.DataFrame, variable: str = "cases", *, age_group_size: int = 20, period: Optional[Period] = None, strings: Translation, **figure_args, ) -> plt.Figure: """ Not a very interesting chart. Args: data: DataFrame having 'cases' and/or 'deaths' as first-level columns (e.g. ``ic.get()``) variable: age_group_size: period: strings: lang: Returns: """ check_variable(variable) check_age_group_size(age_group_size) period_slice: slice = slice( *period) if period else slice(data.index[0], None) # resample for a smoother graph d = resample_if_needed(data[variable], "D", hour=18, method="pchip") d = ic.aggregate_age_groups( d[period_slice].drop(columns="unknown"), cuts=age_group_size, fmt_last="{}+", ) figure_args.setdefault("figsize", DOUBLE_CHART_FIGSIZE) fig, ax = plt.subplots(2, 1, **figure_args) title = strings[f"title.{variable}"] ax[0].set_title(title) area_chart(d, ax=ax[0], lang=strings.lang) area_chart(d, normalize=True, ax=ax[1], lang=strings.lang)
def __init__( self, counts: pd.DataFrame, variable: str = "cases", *, strings: Translation = NullTranslation(), normalize: bool = True, age_group_size: int = 10, window: int = 14, population_distribution: pd.Series = None, ax: plt.Axes = None, resample_kwargs: Dict[str, Any] = {}, ): """ Shows the age distribution of cases/deaths at a given date. Usable either to draw a static chart or to generate an animation. Args: counts: variable: normalize: age_group_size: window: population_distribution: ax: lang: """ self.ax = ax = ax or plt.gca() s = strings data = counts[variable].drop(columns="unknown") data = ic.running_average(data, window=window, **resample_kwargs) data = ic.aggregate_age_groups(data, cuts=age_group_size) if normalize: data = data.divide(data.sum(axis=1), axis=0) ax.yaxis.set_major_formatter( mpl.ticker.PercentFormatter(xmax=1.0, decimals=0)) self.data = data if normalize and population_distribution is not None: population = ic.aggregate_age_groups(population_distribution, cuts=age_group_size) sns.barplot( ax=ax, label=s["istat_population_data_label"], x=population.index, y=population, facecolor='#a1c9f4', hatch="/", edgecolor="white", ) age_groups = data.columns label = s[f"{variable}_label"] self.bars = ax.bar( x=age_groups, height=[0] * len(age_groups), label=label, facecolor='#4878d0', alpha=0.7, ) self.labels, self.update_labels = add_labels_to_bars( self.bars, fmt='{:.0%}' if normalize else '{:n}', fontsize=10, ax=ax, color='white', bbox=dict( boxstyle=mpl.patches.BoxStyle("Round", pad=0.3), color='black', alpha=.35, ), ) ymax = 1.05 * data.max().max() ax.set_ylim(0, ymax) ax.set_xticklabels(age_groups) ax.set_xlabel(s["age"]) ax.set_ylabel("") ax.grid(False, which='both', axis='x') if normalize: title = s.get(f"running_{variable}_age_distribution", count=window) else: title = s.get(f"running_average_{variable}_title", count=window) ax.set_title(title, fontsize=14) if normalize and population_distribution is not None: ax.yaxis.set_major_formatter( mpl.ticker.PercentFormatter(xmax=1.0, decimals=0)) ax.legend() self.date_text = ax.text( 0.5, 0.93, "", ha="center", va="center", transform=ax.transAxes, color=(1, 1, 1, 0.9), bbox=dict( boxstyle=mpl.patches.BoxStyle("Round", pad=0.4), color=(0, 0, 0, 0.30), ), fontsize=14, fontweight="semibold", ) self.artists = [self.date_text, *self.bars, *self.labels]
def average_by_period_bar_chart( counts: pd.DataFrame, variable: str, *, strings: Translation, freq: Union[str, int] = 7, normalize: bool = False, age_group_size: int = 20, stacked: bool = True, ylim: float = None, ax: Optional[plt.Axes] = None, figsize: Tuple[float, float] = (12, 7), ) -> plt.Axes: if ax is None: _, ax = plt.subplots(figsize=figsize) # Data preparation d = counts[variable].drop(columns='unknown') d = ic.aggregate_age_groups(d, age_group_size) d = ic.count_by_period(d, freq=freq) if normalize: d = d.div(d.sum(axis=1), axis=0) # Plot ax = d.plot.bar(stacked=stacked, cmap=DEFAULT_CMAP, ax=ax, width=.85) legend(ax=ax, title=strings['age']) # Axes setup if freq == 'M': xlabel = strings['month'] date_fmt = '%B' xtick_rotation = 0 else: xlabel = strings['period_end_date'] date_fmt = '%d %b' xtick_rotation = 45 ax.set_xticklabels(d.index.strftime(date_fmt), rotation=xtick_rotation) ax.set_xlabel(xlabel) if normalize: if stacked: ax.set_ylim([0, 1.0]) ax.yaxis.set_major_formatter( mpl.ticker.PercentFormatter(xmax=1.0, decimals=0)) else: ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:n}')) if ylim: ax.set_ylim(0, ylim * ax.get_ylim()[1]) # Title base_title = strings[f'{variable}_age_distribution' if normalize else f'average_daily_{variable}'] if freq == 'M': period_label = strings['month_by_month'] elif freq in {'W', 'W-SUN'}: period_label = strings['week_by_week'] elif isinstance(freq, int): period_label = strings.get('by_periods_of_n_days', count=freq) title = f"{base_title} {period_label}" ax.set_title(title) return ax