Esempio n. 1
0
    def plot_areas_society_progress(ax: plt.axis, time_array: List,
                                    society_snapshot: Dict,
                                    society_progress: Dict):

        previous_status = None
        ax.set_xlabel('time [days]')
        ax.set_ylabel('population percentage')
        for st in Status:
            society_progress[st.name].append(society_snapshot[st.name])

            if previous_status:
                lower_limit = society_progress[previous_status.name]
            else:
                lower_limit = [0]

            ax.fill_between(x=time_array,
                            y1=lower_limit,
                            y2=society_progress[st.name],
                            color=st.value,
                            label=st.name,
                            alpha=0.25)

            ax.text(x=time_array[-1],
                    y=1 / 2 * (lower_limit[-1] + society_snapshot[st.name]),
                    s=r"{0:.2f}".format(society_snapshot[st.name] -
                                        lower_limit[-1]),
                    size=10,
                    color=st.value)

            previous_status = st
Esempio n. 2
0
def plot_percentiles(
        ax: plt.axis, df: pd.DataFrame, percentiles: Tuple[float, float],
        alpha: float) -> None:

    assert 0 <= alpha <= 1

    assert len(percentiles) == 2 and \
        0 <= percentiles[0] <= 1 and \
        0 <= percentiles[1] <= 1, "percentiles must be between 0 and 1"

    pct_low = df.quantile(percentiles[0], axis=1)
    pct_high = df.quantile(percentiles[1], axis=1)
    ax.fill_between(df.index, pct_low, pct_high, alpha=alpha)
Esempio n. 3
0
    def _plot_summary_on_axis(
        self,
        ax: plt.axis,
        label_y_axis: bool,
        use_title: bool,
    ):
        """used to plot summary on multi-axis figure, or in standalone figure"""

        # axis
        if use_title:
            ax.set_title('Average', fontsize=configs.Figs.title_font_size)
            y_axis_label = self.y_axis_label
        else:
            y_axis_label = f'Average {self.y_axis_label}'
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.set_ylim(self.y_lims)

        # x-axis
        ax.set_xticks([self.last_step])
        ax.set_xticklabels([shorten_tick_label(self.last_step)],
                           fontsize=configs.Figs.tick_font_size)
        ax.set_xlabel(self.x_axis_label, fontsize=configs.Figs.ax_font_size)

        # y axis
        if label_y_axis:
            ax.set_ylabel(y_axis_label, fontsize=configs.Figs.ax_font_size)
            ax.set_yticks(self.y_ticks)
            ax.set_yticklabels(self.y_ticks,
                               fontsize=configs.Figs.tick_font_size)
        else:
            ax.set_ylabel('', fontsize=configs.Figs.ax_font_size)
            ax.set_yticks([])
            ax.set_yticklabels([], fontsize=configs.Figs.tick_font_size)

        # collect curves for each replication across all paradigms
        gn2rep2curves_by_pd = defaultdict(dict)
        for pd in self.pds:
            for gn, rep2curve in pd.group_name2rep2curve.items():
                for rep, curve in rep2curve.items():
                    # this curve is performance collapsed across template and for a unique rep and paradigm
                    gn2rep2curves_by_pd[gn].setdefault(rep, []).append(curve)

        # plot
        for gn, rep2curves_by_pd in gn2rep2curves_by_pd.items():
            # average across paradigms
            rep2curve_avg_across_pds = {
                rep: np.array(curves_by_pd).mean(axis=0)
                for rep, curves_by_pd in rep2curves_by_pd.items()
            }
            curves = np.array([
                rep2curve_avg_across_pds[rep]
                for rep in rep2curve_avg_across_pds
            ])  # one for each rep

            color = f'C{self.pds[0].group_names.index(gn)}'
            x = np.arange(0, self.last_step + self.step_size, self.step_size)

            # plot averages for BabyBERTa
            y = np.array(curves).mean(axis=0)
            ax.plot(x, y, linewidth=self.line_width, color=color)

            # plot average for RoBERTa-base
            y_roberta_base = np.repeat(
                np.mean(list(self.paradigm2roberta_base_accuracy.values())),
                len(x))
            ax.plot(x,
                    y_roberta_base,
                    linewidth=self.line_width,
                    **self.ax_kwargs_roberta_base)

            # plot average for frequency baseline
            y_baseline = np.repeat(
                np.mean(list(self.paradigm2baseline_accuracy.values())),
                len(x))
            ax.plot(x,
                    y_baseline,
                    linewidth=self.line_width,
                    **self.ax_kwargs_baseline)

            # plot the margin of error (shaded region)
            n = len(curves)
            h = sem(curves, axis=0) * t.ppf(
                (1 + self.confidence) / 2, n - 1)  # margin of error
            ax.fill_between(x, y + h, y - h, alpha=0.2, color=color)

            # printout
            if use_title:  # to prevent printing summary twice
                print(f'{gn} avg acc at step {self.last_step} = {y[-1]:.3f}')

        if use_title:
            y_roberta_base = np.mean(
                list(self.paradigm2roberta_base_accuracy.values()))
            print(
                f'roberta-base Liu2019 avg acc at step {self.last_step} = {y_roberta_base:.3f}'
            )