def plot_wikipedia_pmfs(): """ https://en.wikipedia.org/wiki/Geometric_distribution#/media/ File:Geometric_pmf.svg """ axf = AxesFormatter(width=12, height=9) Geometric(p=0.2).plot(k=k, kind='line', color='gray', ax=axf.axes, marker='o', mfc='orange') Geometric(p=0.5).plot(k=k, kind='line', color='gray', ax=axf.axes, marker='o', mfc='purple') Geometric(p=0.8).plot(k=k, kind='line', color='gray', ax=axf.axes, marker='o', mfc='lightblue') axf.set_text(x_label='x', y_label='P(X=x)', title='Probability mass function').set_x_lim(0, 10).set_y_lim( 0, 1) axf.axes.legend(loc='upper right') plt.show()
def __init__(self, axes: ndarray): """ Create a new AxesFormatterArray :param axes: Array of Axes instances. """ self._axes = empty_like(axes, dtype=AxesFormatter) if axes.ndim == 1: for i in range(axes.shape[0]): self._axes[i] = AxesFormatter(axes[i]) elif axes.ndim == 2: for i in range(axes.shape[0]): for j in range(axes.shape[1]): self._axes[i, j] = AxesFormatter(axes[i, j])
def x_axes(self) -> Union[AxisFormatter, AxisFormatterArray]: """ Return an AxisFormatter or AxisFormatterArray for the X-Axis or X-Axes of the wrapped Axes. """ if not self._has_array: return AxesFormatter(self._axes).x_axis else: axes = empty_like(self._axes, dtype=AxisFormatter) if axes.ndim == 1: for i in range(self._axes.shape[0]): axes[i] = AxisFormatter( axis=self._axes[i].xaxis, direction='x', axes=self._axes[i] ) elif axes.ndim == 2: for i in range(axes.shape[0]): for j in range(axes.shape[1]): axes[i, j] = AxisFormatter( axis=self._axes[i, j].xaxis, direction='x', axes=self._axes[i, j] ) return AxisFormatterArray(axes)
def single(self) -> AxesFormatter: """ Return an AxesFormatter for the wrapped Axes. """ if not self._has_array: return AxesFormatter(self._axes) else: raise TypeError('FigureFormatter holds an array of Axes.')
def plot_distribution(self, data: Optional[Series] = None, transpose: bool = False, bins: Optional[Bins] = None, color: str = 'C0', pct_size: int = None, grid: bool = False, title: Optional[str] = None, x_label: Optional[str] = None, y_label: Optional[str] = None, ax: Optional[Axes] = None) -> Axes: """ Plot a histogram of the distribution of the response data. :param data: The answers given by Respondents to the Question. :param transpose: True to plot horizontally. :param bins: Value for hist bins. Leave as None for integer bins. :param color: Color or list of colors for the bars. :param pct_size: Font size for the percent markers. :param grid: Whether to show a plot grid or not. :param title: Optional title for the plot. :param x_label: Label for the x-axis. :param y_label: Label for the y-axis. :param ax: Optional matplotlib axes to plot on. """ ax = ax or new_axes() data = data if data is not None else self._data if data is None: raise ValueError('No data!') orientation = 'horizontal' if transpose else 'vertical' bins = bins or self._default_hist(data) data.plot(kind='hist', ax=ax, bins=bins, orientation=orientation, color=color) # add percentages hist, edges = histogram(data, bins=bins) item_counts = Series(index=0.5 * (edges[1:] + edges[:-1]), data=hist) item_pcts = 100 * item_counts / item_counts.sum() label_bar_plot_pcts(item_counts=item_counts, item_pcts=item_pcts, ax=ax, transpose=transpose, font_size=pct_size) # add titles and grid ax.set_title(self.text) AxesFormatter(ax).set_text( title=title, x_label=x_label, y_label=y_label).set_axis_below(True).grid(grid) if transpose and not x_label: ax.set_xlabel('# Respondents') elif not transpose and not y_label: ax.set_ylabel('# Respondents') return ax
def axes(self) -> Union[AxesFormatter, AxesFormatterArray]: """ Return an AxesFormatter or AxesFormatterArray for the wrapped Axes or array of Axes. """ if not self._has_array: return AxesFormatter(self._axes) else: return AxesFormatterArray(self._axes)
def plot_wikipedia_pmfs(): """ https://en.wikipedia.org/wiki/Hypergeometric_distribution#/media/ File:HypergeometricPDF.png """ axf = AxesFormatter(width=12, height=9) HyperGeometric(N=500, K=50, n=100).plot( k=k, kind='line', color='blue', ax=axf.axes, marker='o', mfc='blue' ) HyperGeometric(N=500, K=60, n=200).plot( k=k, kind='line', color='green', ax=axf.axes, marker='o', mfc='green' ) HyperGeometric(N=500, K=70, n=300).plot( k=k, kind='line', color='red', ax=axf.axes, marker='o', mfc='red' ) axf.set_text( x_label='k', y_label='P(X=k)', title='Probability mass function' ).set_x_lim(0, 60).set_y_lim(0, 0.15) axf.axes.legend(loc='upper right') plt.show()
def plot_wikipedia_cdfs(): """ https://en.wikipedia.org/wiki/Hypergeometric_distribution#/media/ File:HypergeometricCDF.png """ axf = AxesFormatter(width=12, height=9) HyperGeometric(N=500, K=50, n=100).cdf().plot( k=k, kind='line', color='blue', ax=axf.axes, marker='o', mfc='blue' ) HyperGeometric(N=500, K=60, n=200).cdf().plot( k=k, kind='line', color='green', ax=axf.axes, marker='o', mfc='green' ) HyperGeometric(N=500, K=70, n=300).cdf().plot( k=k, kind='line', color='red', ax=axf.axes, marker='o', mfc='red' ) axf.set_text( x_label='x', y_label='P(X≤x)', title='Cumulative distribution function' ).set_x_lim(0, 60).set_y_lim(0, 1) axf.axes.legend(loc='lower right') plt.show()
def plot_scipy_pmf(): """ https://docs.scipy.org/doc/scipy/reference/generated/ scipy.stats.betabinom.html#scipy.stats.betabinom """ bb = BetaBinomial(n=5, alpha=2.3, beta=0.63) axf = AxesFormatter() bb.pmf().plot(k=range(0, 5), kind='line', ls='', marker='o', color='blue', vlines=True, ax=axf.axes) axf.set_x_lim(-0.5, 4.5) axf.show()
def plot_comparison(self, ax: Axes = None, **kwargs) -> Axes: """ Plot a comparison between the different question ratings. :param ax: Optional matplotlib axes. """ ax = ax or new_axes() if 'cmap' not in kwargs: kwargs['cmap'] = 'Blues' data = DataFrame( {k: q.value_counts() for k, q in self.item_dict.items()}) heatmap(data=data, ax=ax, annot=True, fmt='d', **kwargs) AxesFormatter(ax).set_text(x_label='Question', y_label='Rating').invert_y_axis() draw_vertical_dividers(ax) return ax
def plot_wikipedia_cdfs(): """ https://en.wikipedia.org/wiki/File:Beta-binomial_cdf.png """ axf = AxesFormatter(width=12, height=9) for alpha, beta, color in zip(alpha_wiki, beta_wiki, colors_wiki): BetaBinomial(n=10, alpha=alpha, beta=beta).cdf().plot( k=k_wiki, color=color, kind='line', ax=axf.axes, marker='o' ) axf.add_legend() axf.show()
def plot_distribution(self, data: Optional[Series] = None, drop_na: bool = True, transpose: bool = True, color: str = 'C0', pct_size: int = None, significance: bool = False, sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'), label_mappings: Optional[Dict[str, str]] = None, grid: bool = False, max_axis_label_chars: Optional[int] = None, title: Optional[str] = None, x_label: Optional[str] = None, y_label: Optional[str] = None, ax: Optional[Axes] = None, **kwargs) -> Axes: """ Plot the distribution of answers to the Question. :param data: The answers given by Respondents to the Question. :param drop_na: Whether to drop null responses from the dataset (affects % calculations). :param transpose: Whether to transpose the labels to the y-axis. :param ax: Optional matplotlib axes to plot on. :param color: Color or list of colors for the bars. :param pct_size: Font size for the percent markers. :param significance: Whether to highlight significant categories. :param sig_colors: Tuple of (high, low) colors for highlighting significance. :param label_mappings: Optional dict of replacements for labels. :param grid: Whether to show a plot grid or not. :param max_axis_label_chars: Maximum number of characters in axis labels before wrapping. :param title: Axes title. :param x_label: Label for the x-axis. :param y_label: Label for the y-axis. """ data = data if data is not None else self._data if data is None: raise ValueError('No data!') if len(data) == 0: return ax if title is None: title = self.text if x_label is None: x_label = self.name if not transpose else '# Respondents' if y_label is None: y_label = '# Respondents' if not transpose else self.name features = self.make_features(answers=data, drop_na=drop_na, naming='{{choice}}') item_counts: Series = features.sum() plot_type = 'barh' if transpose else 'bar' ax = ax or new_axes() if label_mappings is not None: item_counts.index = wrap_text(map_text(item_counts.index, mapping=label_mappings or {}), max_width=max_axis_label_chars) else: item_counts.index = wrap_text(item_counts.index, max_width=max_axis_label_chars) edge_color = None line_width = None if significance: one_vs_any = self.significance_one_vs_any() edge_color = [ sig_colors[0] if one_vs_any[category] >= 0.945 else sig_colors[1] if one_vs_any[category] < 0.055 else color for category in self.category_names ] line_width = [2 if ec != color else None for ec in edge_color] item_counts.plot(kind=plot_type, ax=ax, color=color, edgecolor=edge_color, linewidth=line_width, **kwargs) # add percentages item_pcts = 100 * item_counts.div(len(features)) label_bar_plot_pcts(item_counts=item_counts, item_pcts=item_pcts, ax=ax, transpose=transpose, font_size=pct_size) # add titles and grid AxesFormatter(ax).set_text( title=title, x_label=x_label, y_label=y_label).set_axis_below(True).grid(grid) if transpose and not x_label: ax.set_xlabel('# Respondents') elif not transpose and not y_label: ax.set_ylabel('# Respondents') return ax
def set_text( axes_formatter: AxesFormatter, string: str ) -> AxesFormatter: return axes_formatter.set_y_label_text(string)
def plot_distribution(self, data: Optional[Series] = None, transpose: bool = False, normalize: bool = False, significance: bool = False, sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'), sig_values: Tuple[float, float] = (0.945, 0.055), label_mappings: Optional[Dict[str, str]] = None, ax: Optional[Axes] = None) -> Axes: """ Plot the distribution of answers to the Question. :param data: The answers given by Respondents to the Question. :param transpose: Whether to transpose the labels to the y-axis. :param normalize: Whether to normalize number of responses in each position to total number of responses. :param significance: Whether to highlight significant choices. :param sig_colors: Tuple of (high, low) colors for highlighting significance. :param sig_values: Tuple of (high, low) values for assessing significance. :param label_mappings: Optional dict of replacements for labels. :param ax: Optional matplotlib axes to plot on. """ data = data if data is not None else self._data if data is None: raise ValueError('No data!') order_counts = [] for index, str_user_order in data.iteritems(): if isnull(str_user_order): continue user_order = str_user_order.split(CATEGORY_SPLITTER) for i in range(len(user_order)): order_counts.append({ 'choice': user_order[i], 'rank': i + 1, }) counts = DataFrame(order_counts).groupby( ['choice', 'rank']).size().reset_index().rename(columns={0: 'count'}) pivot = pivot_table(data=counts, index='choice', columns='rank', values='count').reindex(self.categories) pivot.index = wrap_text( map_text(pivot.index, mapping=label_mappings or {})) if normalize: fmt = '.2f' pivot = pivot / len(data) else: fmt = '.0f' if transpose: pivot = pivot.T axf = AxesFormatter(axes=ax) ax = axf.axes heatmap(data=pivot, annot=True, fmt=fmt, cmap='Blues', ax=ax) if significance: cat_sigs = self.significance__one_vs_any() for category, sig_value in cat_sigs.iteritems(): if sig_values[1] < sig_value < sig_values[0]: continue elif sig_value <= sig_values[1]: color = sig_colors[1] elif sig_value >= sig_values[0]: color = sig_colors[0] if not transpose: x_min = 0.1 x_max = len(self.categories) - 0.1 y_min = self.categories.index(category) + 0.1 y_max = self.categories.index(category) + 0.9 else: y_min = 0.1 y_max = len(self.categories) - 0.1 x_min = self.categories.index(category) + 0.1 x_max = self.categories.index(category) + 0.9 ax.plot([x_min, x_max, x_max, x_min, x_min], [y_min, y_min, y_max, y_max, y_min], color=color, linewidth=2) axf.x_axis.tick_labels.set_ha_center() axf.y_axis.tick_labels.set_va_center() if transpose: draw_vertical_dividers(ax) else: draw_horizontal_dividers(ax) axf.set_title_text(self.text) return ax