Exemple #1
0
    def decision_boundary(self, x, y, ax: plt.axis = None):
        """
        Plot decision boundary and labeled data
        :param x: data
        :param y: true labels
        :param ax: matplotlib axes (optional)
        :return: None
        """
        if ax is None:
            fig, ax = plt.subplots(figsize=(10, 10))

        h = 0.02
        x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
        y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        z = self.predict(np.c_[xx.ravel(), yy.ravel()])
        z = z.reshape(xx.shape)
        ax.contourf(xx, yy, z, cmap=plt.cm.coolwarm, alpha=0.8)
        ax.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.coolwarm)
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())

        plt.show()
Exemple #2
0
    def decision_boundary(self, x, y, name: str = '', ax: plt.axis = None):
        if ax is None:
            fig, ax = plt.subplots(figsize=(10, 10))

        h = 0.02

        x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
        y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1

        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        print(np.c_[xx.ravel(), yy.ravel()].shape)
        z = self.predict(np.c_[xx.ravel(), yy.ravel()])
        z = z.reshape(xx.shape)

        ax.contourf(xx, yy, z, cmap=plt.cm.coolwarm, alpha=0.8)
        ax.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.coolwarm)
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())

        if name:
            plt.savefig(os.path.join(os.getcwd(), name + '.png'))
            plt.clf()
        else:
            plt.show()
            plt.clf()
Exemple #3
0
    def _plot_summary_on_axis(
        self,
        ax: plt.axis,
        label_y_axis: bool,
        use_title: bool,
    ):
        """used to plot summary on multi-axis figure, or in standalone figure"""

        # axis
        if use_title:
            ax.set_title('Average', fontsize=configs.Figs.title_font_size)
            y_axis_label = self.y_axis_label
        else:
            y_axis_label = f'Average {self.y_axis_label}'
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.set_ylim(self.y_lims)

        # x-axis
        ax.set_xticks([self.last_step])
        ax.set_xticklabels([shorten_tick_label(self.last_step)],
                           fontsize=configs.Figs.tick_font_size)
        ax.set_xlabel(self.x_axis_label, fontsize=configs.Figs.ax_font_size)

        # y axis
        if label_y_axis:
            ax.set_ylabel(y_axis_label, fontsize=configs.Figs.ax_font_size)
            ax.set_yticks(self.y_ticks)
            ax.set_yticklabels(self.y_ticks,
                               fontsize=configs.Figs.tick_font_size)
        else:
            ax.set_ylabel('', fontsize=configs.Figs.ax_font_size)
            ax.set_yticks([])
            ax.set_yticklabels([], fontsize=configs.Figs.tick_font_size)

        # collect curves for each replication across all paradigms
        gn2rep2curves_by_pd = defaultdict(dict)
        for pd in self.pds:
            for gn, rep2curve in pd.group_name2rep2curve.items():
                for rep, curve in rep2curve.items():
                    # this curve is performance collapsed across template and for a unique rep and paradigm
                    gn2rep2curves_by_pd[gn].setdefault(rep, []).append(curve)

        # plot
        for gn, rep2curves_by_pd in gn2rep2curves_by_pd.items():
            # average across paradigms
            rep2curve_avg_across_pds = {
                rep: np.array(curves_by_pd).mean(axis=0)
                for rep, curves_by_pd in rep2curves_by_pd.items()
            }
            curves = np.array([
                rep2curve_avg_across_pds[rep]
                for rep in rep2curve_avg_across_pds
            ])  # one for each rep

            color = f'C{self.pds[0].group_names.index(gn)}'
            x = np.arange(0, self.last_step + self.step_size, self.step_size)

            # plot averages for BabyBERTa
            y = np.array(curves).mean(axis=0)
            ax.plot(x, y, linewidth=self.line_width, color=color)

            # plot average for RoBERTa-base
            y_roberta_base = np.repeat(
                np.mean(list(self.paradigm2roberta_base_accuracy.values())),
                len(x))
            ax.plot(x,
                    y_roberta_base,
                    linewidth=self.line_width,
                    **self.ax_kwargs_roberta_base)

            # plot average for frequency baseline
            y_baseline = np.repeat(
                np.mean(list(self.paradigm2baseline_accuracy.values())),
                len(x))
            ax.plot(x,
                    y_baseline,
                    linewidth=self.line_width,
                    **self.ax_kwargs_baseline)

            # plot the margin of error (shaded region)
            n = len(curves)
            h = sem(curves, axis=0) * t.ppf(
                (1 + self.confidence) / 2, n - 1)  # margin of error
            ax.fill_between(x, y + h, y - h, alpha=0.2, color=color)

            # printout
            if use_title:  # to prevent printing summary twice
                print(f'{gn} avg acc at step {self.last_step} = {y[-1]:.3f}')

        if use_title:
            y_roberta_base = np.mean(
                list(self.paradigm2roberta_base_accuracy.values()))
            print(
                f'roberta-base Liu2019 avg acc at step {self.last_step} = {y_roberta_base:.3f}'
            )
Exemple #4
0
def plot_cluster_metric_scores(
    metric_scores: list,
    hyperparameters: list,
    best_score_idx: int,
    metric_name: str,
    scatter: bool = True,
    set_xticks: bool = True,
    set_xtickslabels: bool = True,
    xtickslabels_rotation: int = 90,
    ax: plt.axis = None,
    xlabel: str = "Hyperparameters",
    xrange: range = None,
    show_plot: bool = True,
) -> None:
    """
    Plots internal cluster validation metric scores

    Parameters
    ----------
    metric_scores : list
        List of scores computed using metric
    hyperparameters : list
        List of hyperparameters used to compute the scores
    best_score_idx : int
        Best score index
    metric_name : str
        Name of the internal cluster validation metric
    scatter : bool
        Whether or not to scatter points (defaults to True)
    set_xticks : bool
        Whether or not to set the ticks on the x-axis
    set_xtickslabels : bool
        Whether or not to set the labels on the x-axis
    xtickslabels_rotation : int
        Sets the xticks labels rotation (defaults to 90), set_xtickslabels
        must be set to True to have an effect.
    ax : plt.axis
        Matplotlib axis (defaults to None)
    xlabel : str
        X-axis label (defaults to "Hyperparameters")
    xrange : range
        Range to use for the x-axis (default starts from 0 to)
    show_plot : bool
        Whether or not to call plt.show() (defaults to True)
    """
    if ax is None:
        _, ax = plt.subplots()
    if xrange is None:
        xrange = range(len(hyperparameters))
    ax.plot(xrange, metric_scores)
    if scatter:
        ax.scatter(xrange, metric_scores)
    ax.scatter(xrange[best_score_idx],
               metric_scores[best_score_idx],
               c="r",
               s=72,
               zorder=10)
    if set_xticks:
        ax.set_xticks(xrange)
    if set_xtickslabels:
        ax.set_xticklabels(hyperparameters,
                           rotation=xtickslabels_rotation,
                           ha="center")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(f"{metric_name} score")
    if show_plot:
        plt.tight_layout()
        plt.show()