예제 #1
0
 def plot_1_1(ax: matplotlib.axes.SubplotBase, df_pred_str: pd.DataFrame, df_pred_str_null: pd.DataFrame, rows_set: np.ndarray) -> None:
     mu, sigma = ClusteringUtils.calc_distribution(df_pred_str.loc[rows_set])
     mu_null, sigma_null = ClusteringUtils.calc_distribution(df_pred_str_null.loc[rows_set])
     y = pd.DataFrame([
         mu - sigma,
         mu,
         mu + sigma,
         mu_null - sigma_null,
         mu_null,
         mu_null + sigma_null
     ]).transpose()
     x = pd.DataFrame([df_pred_str.columns for _ in range(len(y.columns))]).transpose()
     lty = ["--", "-", "--", "--", "-", "--"]
     lwd = [1, 2, 1, 1, 2, 1]
     colors = ["blue", "blue", "blue", "red", "red", "red"]
     legend_lines = [
         Line2D([0], [0], color="blue", linestyle="-", linewidth=2),
         Line2D([0], [0], color="red", linestyle="-", linewidth=2),
         Line2D([0], [0], color="gray", linestyle="--", linewidth=1)
     ]
     for col in x.columns:
         ax.plot(x[col], y[col], color=colors[col], linestyle=lty[col], linewidth=lwd[col])
         ax.set_xlabel("k")
         ax.set_ylabel("prediction strength")
     ax.legend(legend_lines, ["model", "null", "95% CI"], loc='upper right')
예제 #2
0
    def stackplot(
        self,
        eco,
        title: titleType = None,
        logscale: bool = True,
        ax: matplotlib.axes.SubplotBase = None,
    ) -> matplotlib.figure.Figure:

        populations = eco.population_sizes

        if ax is None:
            _, ax = plt.subplots()
        else:
            ax = ax

        figure = ax.get_figure()
        turns = range(len(populations))
        pops = [
            [populations[iturn][ir] for iturn in turns]
            for ir in self.result_set.ranking
        ]
        ax.stackplot(turns, *pops)

        ax.yaxis.tick_left()
        ax.yaxis.set_label_position("right")
        ax.yaxis.labelpad = 25.0

        ax.set_ylim([0.0, 1.0])
        ax.set_ylabel("Relative population size")
        ax.set_xlabel("Turn")
        if title is not None:
            ax.set_title(title)

        trans = transforms.blended_transform_factory(ax.transAxes, ax.transData)
        ticks = []
        for i, n in enumerate(self.result_set.ranked_names):
            x = -0.01
            y = (i + 0.5) * 1 / self.result_set.num_players
            ax.annotate(
                n,
                xy=(x, y),
                xycoords=trans,
                clip_on=False,
                va="center",
                ha="right",
                fontsize=5,
            )
            ticks.append(y)
        ax.set_yticks(ticks)
        ax.tick_params(direction="out")
        ax.set_yticklabels([])

        if logscale:
            ax.set_xscale("log")

        plt.tight_layout()
        return figure
예제 #3
0
파일: plot.py 프로젝트: Nikoleta-v3/Axelrod
    def stackplot(
        self,
        eco,
        title: titleType = None,
        logscale: bool = True,
        ax: matplotlib.axes.SubplotBase = None,
    ) -> matplotlib.figure.Figure:

        populations = eco.population_sizes

        if ax is None:
            _, ax = plt.subplots()
        else:
            ax = ax

        figure = ax.get_figure()
        turns = range(len(populations))
        pops = [
            [populations[iturn][ir] for iturn in turns]
            for ir in self.result_set.ranking
        ]
        ax.stackplot(turns, *pops)

        ax.yaxis.tick_left()
        ax.yaxis.set_label_position("right")
        ax.yaxis.labelpad = 25.0

        ax.set_ylim([0.0, 1.0])
        ax.set_ylabel("Relative population size")
        ax.set_xlabel("Turn")
        if title is not None:
            ax.set_title(title)

        trans = transforms.blended_transform_factory(ax.transAxes, ax.transData)
        ticks = []
        for i, n in enumerate(self.result_set.ranked_names):
            x = -0.01
            y = (i + 0.5) * 1 / self.result_set.num_players
            ax.annotate(
                n,
                xy=(x, y),
                xycoords=trans,
                clip_on=False,
                va="center",
                ha="right",
                fontsize=5,
            )
            ticks.append(y)
        ax.set_yticks(ticks)
        ax.tick_params(direction="out")
        ax.set_yticklabels([])

        if logscale:
            ax.set_xscale("log")

        plt.tight_layout()
        return figure
예제 #4
0
 def plot_1_2(ax: matplotlib.axes.SubplotBase, diff_df: pd.DataFrame, row_id: int) -> None:
     ax.bar(diff_df.columns, diff_df.loc[row_id], color="red")
     ax.set_title("difference")
     ax.set_xlabel("k")
     ax.set_ylabel("diff")
     diff_df_min = diff_df.min().min()
     diff_df_max = diff_df.max().max()
     if CVResultsAggregator.check_limit(diff_df_min) and CVResultsAggregator.check_limit(diff_df_max):
         ax.set_ylim(diff_df_min - 1, diff_df_max + 1)
 def _plot_roc(self, axis: mpl.axes.SubplotBase, y_true: np.ndarray,
               y_pred: np.ndarray, label: str, color: str):
     x, y, _ = roc_curve(y_true, y_pred)
     axis.plot(x,
               y,
               color,
               label="{label}, area={auc:.2f}".format(auc=auc(x, y),
                                                      label=label))
     axis.plot([0, 1], [0, 1], 'k--')
     axis.set_xlabel("False Positive Rate")
     axis.set_ylabel("True Positive Rate")
     axis.set_title("ROC curves - {label}".format(label=label))
예제 #6
0
 def plot_2_2(ax: matplotlib.axes.SubplotBase, df_eff_k: pd.DataFrame, rows_set: np.ndarray) -> None:
     lty = ["--", "-", "--"]
     lwd = [1, 2, 1]
     mu, sigma = ClusteringUtils.calc_distribution(df_eff_k.loc[rows_set])
     y = pd.DataFrame([mu - sigma, mu, mu + sigma]).transpose()
     for col in y.columns:
         ax.plot(df_eff_k.columns, y[col], color="red", linestyle=lty[col], linewidth=lwd[col])
         ax.set_xlabel("k")
         ax.set_ylabel("effective k")
         ax.set_xlim(1, df_eff_k.columns.max())
         ax.set_ylim(1, df_eff_k.columns.max())
     abline_vals = np.array(ax.get_xlim())
     ax.plot(abline_vals, abline_vals, color="grey", linestyle="--")
예제 #7
0
 def plot_2_1_ci(ax: matplotlib.axes.SubplotBase, df_pred_str: pd.DataFrame, df_pred_str_null: pd.DataFrame,
                 rows_set: np.ndarray) -> None:
     mu, sigma = ClusteringUtils.calc_distribution(df_pred_str.loc[rows_set])
     mu_null, sigma_null = ClusteringUtils.calc_distribution(df_pred_str_null.loc[rows_set])
     y = pd.DataFrame([
         mu - sigma > mu_null + sigma_null
     ])
     ax.imshow(y)
     ax.set_xlabel("k")
     ax.set_ylabel("CI intersection")
     ax.set_yticks([])
     ax.xaxis.set_major_locator(ticker.FixedLocator((np.arange(len(y.columns)))))
     ax.xaxis.set_major_formatter(ticker.FixedFormatter(y.columns))
예제 #8
0
def plt_settings_axes(g: matplotlib.axes.SubplotBase,
                      count_df: dask.dataframe.core.DataFrame,
                      grouping_col: list, facet: str, hide_xtitle: bool,
                      log_y: bool) -> None:
    """
    Helper function for plot settings, used in function plt_generic_1d.
    Modifies parameter g for setting titles, axis, formats, etc.
    :param g: matplotlib Axes which will be modified directly in the function.
    :param count_df: pandas dataframe which is plotted.
    :param grouping_col: column for x axis.
    :param facet: parameter passed by function plt_generic_1d, giving information
    on whether we are plotting and average or a count value (on y axis).
    :param hide_xtitle: if set to True, doesn't display title for x axis 
    :param log_y: if set to True, plot in logarithmic scale (for y axis)
    :return: nothing. changes are done directly by modifying parameter g.
    """

    if facet not in ['freq', 'avg']:
        raise ValueError(
            'Parameter facet should be a string of value either "freq" or "avg"'
        )

    # SET X AXIS
    # Labels
    # no particular setup if number of labels is less than the first threshold
    num_xlabels = len(count_df[grouping_col])

    if num_xlabels < LABEL_THRESHOLD_ROTATION:
        g.set_xticklabels(count_df[grouping_col])

    # rotate by 90 degrees if number of labels is between first and second threshold
    elif num_xlabels < LABEL_THRESHOLD_SELECT:
        g.set_xticklabels(count_df[grouping_col], rotation=90)

    # display only certain labels (and rotate by 45 degrees) if number of labels is higher
    else:
        number_of_steps = num_xlabels / 50

        l = np.arange(0, num_xlabels, number_of_steps)

        pos = (l / num_xlabels) * (max(g.get_xticks()) - min(g.get_xticks()))
        g.set_xticks(pos)
        g.set_xticklabels(count_df[grouping_col].iloc[l], rotation=45)

    # Title
    # option to remove the x axis title (when its obvious, e.g. for the years)
    if hide_xtitle:
        g.set_xlabel('')
    else:
        g.set_xlabel(grouping_col)

    # SET Y AXIS
    # log scale option
    if log_y:
        g.set_yscale("log")
        if facet == 'freq':
            g.set_ylabel('# content items (log scale)')
        elif facet == 'avg':
            g.set_ylabel('title length (log scale)')

    else:
        if facet == 'freq':
            g.set_ylabel('# content items')
        elif facet == 'avg':
            g.set_ylabel('title length')

    # Labels
    ylabels = ['{:,.0f}'.format(y) for y in g.get_yticks()]
    g.set_yticklabels(ylabels)

    # Plot Title
    if facet == 'freq':
        g.set_title('Number of content items by %s' % grouping_col)
    elif facet == 'avg':
        g.set_title('Average title length of content items by %s' %
                    grouping_col)
예제 #9
0
 def plot_2_1(ax: matplotlib.axes.SubplotBase, pval_df: pd.DataFrame, row_id: int) -> None:
     ax.bar(pval_df.columns, -np.log10(pval_df.loc[row_id]), color="red")
     ax.set_title("significance of difference")
     ax.set_xlabel("k")
     ax.set_ylabel("-log10(p-value)")
     ax.set_ylim(0, 4)