Ejemplo n.º 1
0
def plot_wikipedia_pmfs():
    """
    https://en.wikipedia.org/wiki/Geometric_distribution#/media/
    File:Geometric_pmf.svg
    """
    axf = AxesFormatter(width=12, height=9)
    Geometric(p=0.2).plot(k=k,
                          kind='line',
                          color='gray',
                          ax=axf.axes,
                          marker='o',
                          mfc='orange')
    Geometric(p=0.5).plot(k=k,
                          kind='line',
                          color='gray',
                          ax=axf.axes,
                          marker='o',
                          mfc='purple')
    Geometric(p=0.8).plot(k=k,
                          kind='line',
                          color='gray',
                          ax=axf.axes,
                          marker='o',
                          mfc='lightblue')
    axf.set_text(x_label='x',
                 y_label='P(X=x)',
                 title='Probability mass function').set_x_lim(0, 10).set_y_lim(
                     0, 1)
    axf.axes.legend(loc='upper right')
    plt.show()
Ejemplo n.º 2
0
    def __init__(self, axes: ndarray):
        """
        Create a new AxesFormatterArray

        :param axes: Array of Axes instances.
        """
        self._axes = empty_like(axes, dtype=AxesFormatter)
        if axes.ndim == 1:
            for i in range(axes.shape[0]):
                self._axes[i] = AxesFormatter(axes[i])
        elif axes.ndim == 2:
            for i in range(axes.shape[0]):
                for j in range(axes.shape[1]):
                    self._axes[i, j] = AxesFormatter(axes[i, j])
Ejemplo n.º 3
0
    def x_axes(self) -> Union[AxisFormatter, AxisFormatterArray]:
        """
        Return an AxisFormatter or AxisFormatterArray for the X-Axis or X-Axes
        of the wrapped Axes.
        """
        if not self._has_array:
            return AxesFormatter(self._axes).x_axis
        else:
            axes = empty_like(self._axes, dtype=AxisFormatter)
            if axes.ndim == 1:
                for i in range(self._axes.shape[0]):
                    axes[i] = AxisFormatter(
                        axis=self._axes[i].xaxis,
                        direction='x',
                        axes=self._axes[i]
                    )
            elif axes.ndim == 2:
                for i in range(axes.shape[0]):
                    for j in range(axes.shape[1]):
                        axes[i, j] = AxisFormatter(
                            axis=self._axes[i, j].xaxis,
                            direction='x',
                            axes=self._axes[i, j]
                        )

        return AxisFormatterArray(axes)
Ejemplo n.º 4
0
 def single(self) -> AxesFormatter:
     """
     Return an AxesFormatter for the wrapped Axes.
     """
     if not self._has_array:
         return AxesFormatter(self._axes)
     else:
         raise TypeError('FigureFormatter holds an array of Axes.')
Ejemplo n.º 5
0
    def plot_distribution(self,
                          data: Optional[Series] = None,
                          transpose: bool = False,
                          bins: Optional[Bins] = None,
                          color: str = 'C0',
                          pct_size: int = None,
                          grid: bool = False,
                          title: Optional[str] = None,
                          x_label: Optional[str] = None,
                          y_label: Optional[str] = None,
                          ax: Optional[Axes] = None) -> Axes:
        """
        Plot a histogram of the distribution of the response data.

        :param data: The answers given by Respondents to the Question.
        :param transpose: True to plot horizontally.
        :param bins: Value for hist bins. Leave as None for integer bins.
        :param color: Color or list of colors for the bars.
        :param pct_size: Font size for the percent markers.
        :param grid: Whether to show a plot grid or not.
        :param title: Optional title for the plot.
        :param x_label: Label for the x-axis.
        :param y_label: Label for the y-axis.
        :param ax: Optional matplotlib axes to plot on.
        """
        ax = ax or new_axes()
        data = data if data is not None else self._data
        if data is None:
            raise ValueError('No data!')
        orientation = 'horizontal' if transpose else 'vertical'
        bins = bins or self._default_hist(data)
        data.plot(kind='hist',
                  ax=ax,
                  bins=bins,
                  orientation=orientation,
                  color=color)

        # add percentages
        hist, edges = histogram(data, bins=bins)
        item_counts = Series(index=0.5 * (edges[1:] + edges[:-1]), data=hist)
        item_pcts = 100 * item_counts / item_counts.sum()
        label_bar_plot_pcts(item_counts=item_counts,
                            item_pcts=item_pcts,
                            ax=ax,
                            transpose=transpose,
                            font_size=pct_size)

        # add titles and grid
        ax.set_title(self.text)
        AxesFormatter(ax).set_text(
            title=title, x_label=x_label,
            y_label=y_label).set_axis_below(True).grid(grid)
        if transpose and not x_label:
            ax.set_xlabel('# Respondents')
        elif not transpose and not y_label:
            ax.set_ylabel('# Respondents')

        return ax
Ejemplo n.º 6
0
 def axes(self) -> Union[AxesFormatter, AxesFormatterArray]:
     """
     Return an AxesFormatter or AxesFormatterArray for the wrapped Axes or
     array of Axes.
     """
     if not self._has_array:
         return AxesFormatter(self._axes)
     else:
         return AxesFormatterArray(self._axes)
Ejemplo n.º 7
0
def plot_wikipedia_pmfs():
    """
    https://en.wikipedia.org/wiki/Hypergeometric_distribution#/media/
    File:HypergeometricPDF.png
    """
    axf = AxesFormatter(width=12, height=9)
    HyperGeometric(N=500, K=50, n=100).plot(
        k=k, kind='line', color='blue',
        ax=axf.axes, marker='o', mfc='blue'
    )
    HyperGeometric(N=500, K=60, n=200).plot(
        k=k, kind='line', color='green',
        ax=axf.axes, marker='o', mfc='green'
    )
    HyperGeometric(N=500, K=70, n=300).plot(
        k=k, kind='line', color='red',
        ax=axf.axes, marker='o', mfc='red'
    )
    axf.set_text(
        x_label='k', y_label='P(X=k)', title='Probability mass function'
    ).set_x_lim(0, 60).set_y_lim(0, 0.15)
    axf.axes.legend(loc='upper right')
    plt.show()
Ejemplo n.º 8
0
def plot_wikipedia_cdfs():
    """
    https://en.wikipedia.org/wiki/Hypergeometric_distribution#/media/
    File:HypergeometricCDF.png
    """
    axf = AxesFormatter(width=12, height=9)
    HyperGeometric(N=500, K=50, n=100).cdf().plot(
        k=k, kind='line', color='blue',
        ax=axf.axes, marker='o', mfc='blue'
    )
    HyperGeometric(N=500, K=60, n=200).cdf().plot(
        k=k, kind='line', color='green',
        ax=axf.axes, marker='o', mfc='green'
    )
    HyperGeometric(N=500, K=70, n=300).cdf().plot(
        k=k, kind='line', color='red',
        ax=axf.axes, marker='o', mfc='red'
    )
    axf.set_text(
        x_label='x', y_label='P(X≤x)', title='Cumulative distribution function'
    ).set_x_lim(0, 60).set_y_lim(0, 1)
    axf.axes.legend(loc='lower right')
    plt.show()
Ejemplo n.º 9
0
def plot_scipy_pmf():
    """
    https://docs.scipy.org/doc/scipy/reference/generated/
    scipy.stats.betabinom.html#scipy.stats.betabinom
    """
    bb = BetaBinomial(n=5, alpha=2.3, beta=0.63)
    axf = AxesFormatter()
    bb.pmf().plot(k=range(0, 5), kind='line', ls='',
                  marker='o', color='blue', vlines=True, ax=axf.axes)
    axf.set_x_lim(-0.5, 4.5)
    axf.show()
Ejemplo n.º 10
0
    def plot_comparison(self, ax: Axes = None, **kwargs) -> Axes:
        """
        Plot a comparison between the different question ratings.

        :param ax: Optional matplotlib axes.
        """
        ax = ax or new_axes()
        if 'cmap' not in kwargs:
            kwargs['cmap'] = 'Blues'
        data = DataFrame(
            {k: q.value_counts()
             for k, q in self.item_dict.items()})
        heatmap(data=data, ax=ax, annot=True, fmt='d', **kwargs)
        AxesFormatter(ax).set_text(x_label='Question',
                                   y_label='Rating').invert_y_axis()
        draw_vertical_dividers(ax)
        return ax
Ejemplo n.º 11
0
def plot_wikipedia_cdfs():
    """
    https://en.wikipedia.org/wiki/File:Beta-binomial_cdf.png
    """
    axf = AxesFormatter(width=12, height=9)
    for alpha, beta, color in zip(alpha_wiki, beta_wiki, colors_wiki):
        BetaBinomial(n=10, alpha=alpha, beta=beta).cdf().plot(
            k=k_wiki, color=color, kind='line', ax=axf.axes,
            marker='o'
        )
    axf.add_legend()
    axf.show()
Ejemplo n.º 12
0
    def plot_distribution(self,
                          data: Optional[Series] = None,
                          drop_na: bool = True,
                          transpose: bool = True,
                          color: str = 'C0',
                          pct_size: int = None,
                          significance: bool = False,
                          sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                          label_mappings: Optional[Dict[str, str]] = None,
                          grid: bool = False,
                          max_axis_label_chars: Optional[int] = None,
                          title: Optional[str] = None,
                          x_label: Optional[str] = None,
                          y_label: Optional[str] = None,
                          ax: Optional[Axes] = None,
                          **kwargs) -> Axes:
        """
        Plot the distribution of answers to the Question.

        :param data: The answers given by Respondents to the Question.
        :param drop_na: Whether to drop null responses from the dataset
                        (affects % calculations).
        :param transpose: Whether to transpose the labels to the y-axis.
        :param ax: Optional matplotlib axes to plot on.
        :param color: Color or list of colors for the bars.
        :param pct_size: Font size for the percent markers.
        :param significance: Whether to highlight significant categories.
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
        :param label_mappings: Optional dict of replacements for labels.
        :param grid: Whether to show a plot grid or not.
        :param max_axis_label_chars: Maximum number of characters in axis labels
                                     before wrapping.
        :param title: Axes title.
        :param x_label: Label for the x-axis.
        :param y_label: Label for the y-axis.
        """
        data = data if data is not None else self._data
        if data is None:
            raise ValueError('No data!')
        if len(data) == 0:
            return ax
        if title is None:
            title = self.text
        if x_label is None:
            x_label = self.name if not transpose else '# Respondents'
        if y_label is None:
            y_label = '# Respondents' if not transpose else self.name
        features = self.make_features(answers=data,
                                      drop_na=drop_na,
                                      naming='{{choice}}')
        item_counts: Series = features.sum()
        plot_type = 'barh' if transpose else 'bar'
        ax = ax or new_axes()
        if label_mappings is not None:
            item_counts.index = wrap_text(map_text(item_counts.index,
                                                   mapping=label_mappings
                                                   or {}),
                                          max_width=max_axis_label_chars)
        else:
            item_counts.index = wrap_text(item_counts.index,
                                          max_width=max_axis_label_chars)

        edge_color = None
        line_width = None
        if significance:
            one_vs_any = self.significance_one_vs_any()
            edge_color = [
                sig_colors[0] if one_vs_any[category] >= 0.945 else
                sig_colors[1] if one_vs_any[category] < 0.055 else color
                for category in self.category_names
            ]
            line_width = [2 if ec != color else None for ec in edge_color]

        item_counts.plot(kind=plot_type,
                         ax=ax,
                         color=color,
                         edgecolor=edge_color,
                         linewidth=line_width,
                         **kwargs)

        # add percentages
        item_pcts = 100 * item_counts.div(len(features))
        label_bar_plot_pcts(item_counts=item_counts,
                            item_pcts=item_pcts,
                            ax=ax,
                            transpose=transpose,
                            font_size=pct_size)

        # add titles and grid
        AxesFormatter(ax).set_text(
            title=title, x_label=x_label,
            y_label=y_label).set_axis_below(True).grid(grid)
        if transpose and not x_label:
            ax.set_xlabel('# Respondents')
        elif not transpose and not y_label:
            ax.set_ylabel('# Respondents')

        return ax
Ejemplo n.º 13
0
 def set_text(
         axes_formatter: AxesFormatter, string: str
 ) -> AxesFormatter:
     return axes_formatter.set_y_label_text(string)
Ejemplo n.º 14
0
    def plot_distribution(self,
                          data: Optional[Series] = None,
                          transpose: bool = False,
                          normalize: bool = False,
                          significance: bool = False,
                          sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                          sig_values: Tuple[float, float] = (0.945, 0.055),
                          label_mappings: Optional[Dict[str, str]] = None,
                          ax: Optional[Axes] = None) -> Axes:
        """
        Plot the distribution of answers to the Question.

        :param data: The answers given by Respondents to the Question.
        :param transpose: Whether to transpose the labels to the y-axis.
        :param normalize: Whether to normalize number of responses in each
                          position to total number of responses.
        :param significance: Whether to highlight significant choices.
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
        :param sig_values: Tuple of (high, low) values for assessing
                           significance.
        :param label_mappings: Optional dict of replacements for labels.
        :param ax: Optional matplotlib axes to plot on.
        """
        data = data if data is not None else self._data
        if data is None:
            raise ValueError('No data!')
        order_counts = []
        for index, str_user_order in data.iteritems():
            if isnull(str_user_order):
                continue
            user_order = str_user_order.split(CATEGORY_SPLITTER)
            for i in range(len(user_order)):
                order_counts.append({
                    'choice': user_order[i],
                    'rank': i + 1,
                })
        counts = DataFrame(order_counts).groupby(
            ['choice',
             'rank']).size().reset_index().rename(columns={0: 'count'})
        pivot = pivot_table(data=counts,
                            index='choice',
                            columns='rank',
                            values='count').reindex(self.categories)
        pivot.index = wrap_text(
            map_text(pivot.index, mapping=label_mappings or {}))
        if normalize:
            fmt = '.2f'
            pivot = pivot / len(data)
        else:
            fmt = '.0f'
        if transpose:
            pivot = pivot.T
        axf = AxesFormatter(axes=ax)
        ax = axf.axes
        heatmap(data=pivot, annot=True, fmt=fmt, cmap='Blues', ax=ax)

        if significance:
            cat_sigs = self.significance__one_vs_any()
            for category, sig_value in cat_sigs.iteritems():
                if sig_values[1] < sig_value < sig_values[0]:
                    continue
                elif sig_value <= sig_values[1]:
                    color = sig_colors[1]
                elif sig_value >= sig_values[0]:
                    color = sig_colors[0]
                if not transpose:
                    x_min = 0.1
                    x_max = len(self.categories) - 0.1
                    y_min = self.categories.index(category) + 0.1
                    y_max = self.categories.index(category) + 0.9
                else:
                    y_min = 0.1
                    y_max = len(self.categories) - 0.1
                    x_min = self.categories.index(category) + 0.1
                    x_max = self.categories.index(category) + 0.9
                ax.plot([x_min, x_max, x_max, x_min, x_min],
                        [y_min, y_min, y_max, y_max, y_min],
                        color=color,
                        linewidth=2)

        axf.x_axis.tick_labels.set_ha_center()
        axf.y_axis.tick_labels.set_va_center()
        if transpose:
            draw_vertical_dividers(ax)
        else:
            draw_horizontal_dividers(ax)
        axf.set_title_text(self.text)
        return ax