Example #1
0
def correlation_matrix(data: pd.DataFrame, vmin: int = -1) -> str:
    """Plot image of a matrix correlation.

    Args:
      data: The matrix correlation to plot.
      vmin: Minimum value of value range.

    Returns:
      The resulting correlation matrix encoded as a string.
    """
    fig_cor, axes_cor = plt.subplots()
    cmap = config["plot"]["correlation"]["cmap"].get(str)
    if vmin == 0:
        cmap = get_cmap_half(cmap)

    labels = data.columns
    matrix_image = axes_cor.imshow(data,
                                   vmin=vmin,
                                   vmax=1,
                                   interpolation="nearest",
                                   cmap=cmap)
    plt.colorbar(matrix_image)
    axes_cor.set_xticks(
        np.arange(0, data.shape[0],
                  float(data.shape[0]) / len(labels)))
    axes_cor.set_yticks(
        np.arange(0, data.shape[1],
                  float(data.shape[1]) / len(labels)))

    font_size = get_correlation_font_size(len(labels))
    axes_cor.set_xticklabels(labels, rotation=90, fontsize=font_size)
    axes_cor.set_yticklabels(labels, fontsize=font_size)
    plt.subplots_adjust(bottom=0.2)

    return plot_360_n0sc0pe(plt)
Example #2
0
def scatter_series(series, x_label="Width", y_label="Height") -> str:
    """

    Examples:
        >>> scatter_series(file_sizes, "Width", "Height")

    Args:
        series:
        x_label:
        y_label:

    Returns:

    """
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    color = config["html"]["style"]["primary_color"].get(str)

    if len(series) > 1000:
        cmap = sns.light_palette(color, as_cmap=True)
        plt.hexbin(*zip(*series.tolist()), cmap=cmap)
    else:
        plt.scatter(*zip(*series.tolist()), color=color)
    return plot_360_n0sc0pe(plt)
Example #3
0
def missing_heatmap(data: pd.DataFrame) -> str:
    """Generate missing values heatmap plot.

    Args:
      data: Pandas DataFrame to generate missing values heatmap plot from.

    Returns:
      The resulting missing values heatmap plot encoded as a string.
    """

    height = 4
    if len(data.columns) > 10:
        height += int((len(data.columns) - 10) / 5)
    height = min(height, 10)

    font_size = get_font_size(data)
    if len(data.columns) > 40:
        font_size /= 1.4

    labels = config["plot"]["missing"]["force_labels"].get(bool)
    missingno.heatmap(
        data,
        figsize=(10, height),
        fontsize=font_size,
        cmap=config["plot"]["missing"]["cmap"].get(str),
        labels=labels,
    )

    if len(data.columns) > 40:
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.3)
    else:
        plt.subplots_adjust(left=0.2, right=0.9, top=0.8, bottom=0.3)

    return plot_360_n0sc0pe(plt)
Example #4
0
def make_pie_chart(series):

    lab = series.value_counts().keys().tolist()
    val = series.value_counts().values.tolist()
    fig, ax = plt.subplots()
    ax.pie(val, labels=lab, autopct='%1.1f%%', shadow=True, startangle=90)
    ax.axis(
        'equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

    return plot_360_n0sc0pe(plt)
Example #5
0
def scatter_pairwise(series1, series2, x_label, y_label) -> str:
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    color = config["html"]["style"]["primary_color"].get(str)

    if len(series1) > 1000:
        cmap = sns.light_palette(color, as_cmap=True)
        plt.hexbin(series1.tolist(), series2.tolist(), gridsize=15, cmap=cmap)
    else:
        plt.scatter(series1.tolist(), series2.tolist(), color=color)
    return plot_360_n0sc0pe(plt)
Example #6
0
def scatter_complex(series) -> str:
    plt.ylabel("Imaginary")
    plt.xlabel("Real")

    color = config["html"]["style"]["primary_color"].get(str)

    if len(series) > 1000:
        cmap = sns.light_palette(color, as_cmap=True)
        plt.hexbin(series.real, series.imag, cmap=cmap)
    else:
        plt.scatter(series.real, series.imag, color=color)

    return plot_360_n0sc0pe(plt)
Example #7
0
def missing_dendrogram(data: pd.DataFrame) -> str:
    """Generate a dendrogram plot for missing values.

    Args:
      data: Pandas DataFrame to generate missing values dendrogram plot from.

    Returns:
      The resulting missing values dendrogram plot encoded as a string.

    """
    missingno.dendrogram(data, fontsize=get_font_size(data) * 2.0)
    plt.subplots_adjust(left=0.1, right=0.9, top=0.7, bottom=0.2)
    return plot_360_n0sc0pe(plt)
Example #8
0
def histogram(series: pd.Series, series_description: dict,
              bins: Union[int, np.ndarray]) -> str:
    """Plot an histogram of the data.

    Args:
      series_description:
      series: The data to plot.
      bins: number of bins (int for equal size, ndarray for variable size)

    Returns:
      The resulting histogram encoded as a string.

    """
    plot = _plot_histogram(series, series_description, bins)
    plot.xaxis.set_tick_params(rotation=45)
    plot.figure.tight_layout()

    return plot_360_n0sc0pe(plt)
Example #9
0
def missing_matrix(data: pd.DataFrame) -> str:
    """Generate missing values matrix plot

    Args:
      data: Pandas DataFrame to generate missing values matrix from.

    Returns:
      The resulting missing values matrix encoded as a string.
    """
    labels = config["plot"]["missing"]["force_labels"].get(bool)
    missingno.matrix(
        data,
        figsize=(10, 4),
        color=hex_to_rgb(config["html"]["style"]["primary_color"].get(str)),
        fontsize=get_font_size(data) / 20 * 16,
        sparkline=False,
        labels=labels,
    )
    plt.subplots_adjust(left=0.1, right=0.9, top=0.7, bottom=0.2)
    return plot_360_n0sc0pe(plt)
Example #10
0
def missing_bar(data: pd.DataFrame) -> str:
    """Generate missing values bar plot.

    Args:
      data: Pandas DataFrame to generate missing values bar plot from.

    Returns:
      The resulting missing values bar plot encoded as a string.
    """
    labels = config["plot"]["missing"]["force_labels"].get(bool)
    missingno.bar(
        data,
        figsize=(10, 5),
        color=hex_to_rgb(config["html"]["style"]["primary_color"].get(str)),
        fontsize=get_font_size(data),
        labels=labels,
    )
    for ax0 in plt.gcf().get_axes():
        ax0.grid(False)
    plt.subplots_adjust(left=0.1, right=0.9, top=0.8, bottom=0.3)
    return plot_360_n0sc0pe(plt)
Example #11
0
def mini_histogram(series: pd.Series, series_description: dict,
                   bins: Union[int, np.ndarray]) -> str:
    """Plot a small (mini) histogram of the data.

    Args:
      series_description:
      series: The data to plot.
      bins: number of bins (int for equal size, ndarray for variable size)

    Returns:
      The resulting mini histogram encoded as a string.
    """
    plot = _plot_histogram(series, series_description, bins, figsize=(2, 1.5))
    plot.axes.get_yaxis().set_visible(False)
    plot.set_facecolor("w")

    xticks = plot.xaxis.get_major_ticks()
    for tick in xticks:
        tick.label1.set_fontsize(8)
    plot.xaxis.set_tick_params(rotation=45)
    plot.figure.tight_layout()

    return plot_360_n0sc0pe(plt)