Ejemplos de show_and_save_plot en Python, ejemplos de utils.plotting.show_and_save_plot en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: scatter_plot.py Proyecto: buidinhan/eda

def scatter_plot(X, Y, x_label="X", y_label="Y", title=None, x_lim=None,
                 y_lim=None, ax=None, show_linear_fitting=False,
                 show=True, save=False, **kwargs):

    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2},
                               figsize=(6, 4))

    ax.scatter(X, Y, **kwargs)

    if show_linear_fitting:
        slope, intercept, r, p, stderr_of_slope = stats.linregress(X, Y)
        predicted_Y = slope*X + intercept
        residual_std = residual_standard_deviation(Y, predicted_Y)
        ax.plot(X, predicted_Y)


        x_label += "\nslope={:.3f}, ".format(slope)
        x_label += "intercept={:.3f}".format(intercept)
        x_label += "\nr={:.3f}, ".format(r)
        x_label += "residual std={:.3f}".format(residual_std)

    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_xlim(x_lim)
    ax.set_ylim(y_lim)
    ax.set_title(title)

    show_and_save_plot(show=show, save=save, filename="scatter.png")

Ejemplo n.º 2

0

Mostrar archivo

Archivo: probability_plot.py Proyecto: buidinhan/eda

def probability_plot(series,
                     title=None,
                     sparams=(),
                     distribution="norm",
                     ax=None,
                     show_fitting=False,
                     save=False,
                     show=True,
                     **kwargs):

    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4))

    results = probplot(series,
                       sparams=sparams,
                       dist=distribution,
                       fit=True,
                       plot=ax,
                       **kwargs)
    ax.set_title(title)

    if show_fitting:
        slope, intercept, r = results[1]
        x_label = "Theoretical quantiles\n"
        x_label += "slope={:.4f}, ".format(slope)
        x_label += "intercept={:.4f}, ".format(intercept)
        x_label += "r={:.4f}".format(r)

        ax.set_xlabel(x_label)

    show_and_save_plot(save=save, show=show, filename="probability_plot.png")

    return results

Ejemplo n.º 3

0

Mostrar archivo

def box_cox_normality_set(series, main_title=None, show=True,
                          save=False, box_cox_kws= None, prob_kws=None,
                          original_hist_kws=None,
                          transformed_hist_kws=None):

    fig, axes = plt.subplots(nrows=2, ncols=2,
                             gridspec_kw={
                                 "left": 0.1, "right": 0.98,
                                 "bottom": 0.1, "top": 0.9,
                                 "wspace": 0.3, "hspace": 0.4,
                                 },
                             figsize=(7, 8))
    org_hist, box_cox = axes[0]
    trans_hist, prob = axes[1]

    fig.suptitle(main_title)
    
    # Box-Cox Normality Plot
    if box_cox_kws is None:
        box_cox_kws = {"lambda_min": -2, "lambda_max": 2, "N": 100}
        
    lambdas, corrs, optimal_lambda, max_corr = \
             box_cox_normality_plot(series, ax=box_cox, show=False,
                                    save=False, **box_cox_kws)
    
    x_label = "λ\nMax CC = {:.3f} at λ = {:.3f}".format(max_corr,
                                                  optimal_lambda)
    box_cox.set_xlabel(x_label)

    # Histogram of the Original Data
    if original_hist_kws is None:
        original_hist_kws = {"title": "Original Data"}

    histogram(series, ax=org_hist, show=False, save=False,
              **original_hist_kws)

    # Transformation of Data
    transformed_series = pd.Series(map(partial(box_cox_transformation,
                                               lambda_=optimal_lambda),
                                      series))
    
    # Histogram of the Transformed Data
    if transformed_hist_kws is None:
        transformed_hist_kws = {"title": "Transformed Data",
                                "x_label": "Transformed Measure"}
    
    histogram(transformed_series, ax=trans_hist, show=False, save=False,
              **transformed_hist_kws)

    # Probability Plot of the Transformed Data
    if prob_kws is None:
        prob_kws = {"title": "Probability Plot"}
    probability_plot(series, ax=prob, show=False, save=False, **prob_kws)
    
    show_and_save_plot(show=show, save=save,
                       filename="box_cox_normality_set.png")

    return lambdas, corrs, optimal_lambda, max_corr

Ejemplo n.º 4

0

Mostrar archivo

def autocorrelation_plot(series,
                         max_lag=None,
                         title=None,
                         ax=None,
                         arima=False,
                         show=True,
                         save=False,
                         **kwargs):

    series = np.array(series)

    if max_lag is None:
        max_lag = len(series) - 2  # Lag values of N and N-1 are meaningless.

    if ax is None:
        fig, ax = plt.subplots()

    # Plotting autocorrelation coefficients
    lags = range(0, max_lag + 1)
    coefs = [autocorrelation_coefficient(series, lag) for lag in lags]

    ax.plot(lags, coefs, **kwargs)
    ax.set_title(title)
    ax.set_xlabel("Lag")
    ax.set_ylabel("Autocorrelation Coefficient")
    ax.set_ylim((-1, 1))

    # Plotting confidence lines
    z_95 = stats.norm.ppf(0.975)  # 0.975 = 1 - 0.05/2
    z_99 = stats.norm.ppf(0.995)  # 0.995 = 1 - 0.01/2
    N = len(series)

    if arima:
        confidence_95 = np.array([
            z_95 / np.sqrt(N) * np.sqrt(1 + 2 * np.sum(series[0:lag]**2))
            for lag in lags
        ])
        confidence_99 = np.array([
            z_99 / np.sqrt(N) * np.sqrt(1 + 2 * np.sum(series[0:lag]**2))
            for lag in lags
        ])
        ax.plot(lags, confidence_99, c="k")
        ax.plot(lags, confidence_95, c="k")
        ax.axhline(y=0, c="k")
        ax.plot(lags, -confidence_95, c="k")
        ax.plot(lags, -confidence_99, c="k")
    else:
        confidence_95 = z_95 / np.sqrt(N)
        confidence_99 = z_99 / np.sqrt(N)
        ax.axhline(y=confidence_99)
        ax.axhline(y=confidence_95)
        ax.axhline(y=0, c="k")
        ax.axhline(y=-confidence_95)
        ax.axhline(y=-confidence_99)

    show_and_save_plot(show=show, save=save, filename="autocorrelation.png")

    return coefs, confidence_95, confidence_99

Ejemplo n.º 5

0

Mostrar archivo

def bihistogram(series_1,
                series_2,
                labels=["Series 1", "Series_2"],
                bins=10,
                x_label="Measure",
                y_label="Count",
                title=None,
                edgecolor="k",
                show=True,
                save=False,
                **kwargs):

    fig, axes = plt.subplots(nrows=2,
                             ncols=1,
                             sharex=True,
                             gridspec_kw={
                                 "left": 0.1,
                                 "right": 0.98,
                                 "top": 0.9,
                                 "bottom": 0.1,
                                 "wspace": 0.3,
                                 "hspace": 0,
                             },
                             figsize=(6, 6))
    hist_1, hist_2 = axes

    # Calculation of shared bin edges for the 2 histograms
    all_values = np.append(np.array(series_1), np.array(series_2))
    _, bin_edges = np.histogram(all_values, bins=bins)

    histogram(series_1,
              bins=bin_edges,
              x_label=None,
              y_label=y_label,
              ax=hist_1,
              show=False,
              **kwargs)

    histogram(series_2,
              bins=bin_edges,
              x_label=x_label,
              y_label=y_label,
              ax=hist_2,
              show=False,
              **kwargs)
    hist_2.invert_yaxis()

    hist_1.annotate(labels[0], xy=(0.02, 0.9), xycoords="axes fraction")
    hist_2.annotate(labels[1], xy=(0.02, 0.05), xycoords="axes fraction")
    fig.suptitle(title)

    show_and_save_plot(show=show, save=save, filename="bihistogram.png")

Ejemplo n.º 6

0

Mostrar archivo

Archivo: doe_scatter_plot.py Proyecto: buidinhan/eda

def doe_scatter_plot(df,
                     response,
                     factors,
                     x_labels=None,
                     y_label="Response",
                     title=None,
                     show_overall_mean=False,
                     figure_size=(8, 6),
                     show=True,
                     save=False,
                     **kwargs):

    fig, axes = plt.subplots(nrows=1,
                             ncols=len(factors),
                             sharey=True,
                             figsize=figure_size,
                             gridspec_kw={
                                 "left": 0.1,
                                 "right": 0.95,
                                 "bottom": 0.1,
                                 "top": 0.95,
                                 "wspace": 0,
                             })
    overall_mean = df[response].mean()

    for factor, ax in zip(factors, axes):
        if show_overall_mean:
            ax.axhline(y=overall_mean)

        factor_levels = np.sort(df[factor].unique())

        def encode(level):
            return factor_levels.searchsorted(level)

        encoded_factors = [encode(x) for x in df[factor]]
        ax.scatter(encoded_factors, df[response], **kwargs)
        ax.set_xticks(list(range(len(factor_levels))))
        ax.set_xticklabels(factor_levels)
        ax.set_xlim(-0.5, len(factor_levels) - 0.5)

    if x_labels is None:
        x_labels = factors

    for x_label, ax in zip(x_labels, axes):
        ax.set_xlabel(x_label)

    axes[0].set_ylabel(y_label)
    fig.suptitle(title)

    show_and_save_plot(show=show, save=save, filename="doe_scatter_plot.png")

Ejemplo n.º 7

0

Mostrar archivo

def ppcc_plot(series, lambda_min=-5, lambda_max=5, dist="tukeylambda",
              N=100, ax=None, save=False, show=True):

    if ax is None:
        fig, ax = plt.subplots()

    svals, ppcc = stats.ppcc_plot(series, lambda_min, lambda_max,
                                  dist=dist, plot=ax, N=N)

    show_and_save_plot(show=show, save=save, filename="ppcc.png")

    max_corr_value = ppcc.max()
    max_corr_lambda = svals[ppcc.argmax()]
    return svals, ppcc, max_corr_value, max_corr_lambda

Ejemplo n.º 8

0

Mostrar archivo

def box_cox_normality_plot(series, lambda_min=-2, lambda_max=2, N=100,
                           ax=None, show=True, save=False):

    if ax is None:
        fig, ax = plt.subplots()

    lambdas, corrs = stats.boxcox_normplot(series, lambda_min,
                                           lambda_max, plot=ax, N=N)

    max_corr_value = corrs.max()
    max_corr_lambda = lambdas[corrs.argmax()]

    show_and_save_plot(show=show, save=save,
                       filename="box_cox_normality.png")

    return lambdas, corrs, max_corr_lambda, max_corr_value

Ejemplo n.º 9

0

Mostrar archivo

Archivo: histogram.py Proyecto: buidinhan/eda

def histogram(series,
              bins=10,
              x_label="Measure",
              y_label="Count",
              title=None,
              edgecolor="k",
              ax=None,
              save=False,
              show=True,
              plot_pdf=False,
              show_statistics=False,
              **kwargs):

    # Calculation of Statistics
    mean = np.mean(series)
    std = np.std(series, ddof=1)
    range_ = max(series) - min(series)

    # Histogram
    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4))

    ax.hist(series, bins=bins, edgecolor=edgecolor, **kwargs)

    if show_statistics:
        if x_label is None:
            x_label = ""

        x_label += "\n(mean={:.4f}, std={:.4f}, range={:.4f})".format(
            mean, std, range_)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_title(title)

    # PDF Plot
    if plot_pdf:
        Xs = np.arange(mean - 3 * std, mean + 3 * std, 6 * std / 100)
        Ys = normal_pdf(Xs, mean=mean, std=std)
        ax1 = ax.twinx()
        ax1.plot(Xs, Ys)
        ax1.axis("off")

    show_and_save_plot(save=save, show=show, filename="histogram.png")

    return mean, std, range_

Ejemplo n.º 10

0

Mostrar archivo

Archivo: box_plot.py Proyecto: buidinhan/eda

def box_plot(df,
             column=None,
             group=None,
             x_label=None,
             y_label=None,
             ax=None,
             show=True,
             save=False,
             **kwargs):

    if ax is None:
        fig, ax = plt.subplots()

    df.boxplot(column=column, by=group, ax=ax, **kwargs)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    show_and_save_plot(show=show, save=save, filename="box_plot.png")

Ejemplo n.º 11

0

Mostrar archivo

Archivo: bootstrap_plot.py Proyecto: buidinhan/eda

def bootstrap_plot(series,
                   fig=None,
                   size=50,
                   samples=500,
                   show=True,
                   save=False,
                   **kwargs):

    if fig is None:
        fig = plt.figure(figsize=(10, 8))

    pd.plotting.bootstrap_plot(series,
                               fig=fig,
                               size=size,
                               samples=samples,
                               **kwargs)

    show_and_save_plot(show=show, save=save, filename="bootstrap.png")

Ejemplo n.º 12

0

Mostrar archivo

def four_plot(series,
              main_title="4-PLOT",
              show=True,
              save=False,
              run_kws=None,
              lag_kws=None,
              hist_kws=None,
              prob_kws=None):

    fig, axes = plt.subplots(nrows=2,
                             ncols=2,
                             gridspec_kw={
                                 "left": 0.1,
                                 "right": 0.98,
                                 "top": 0.9,
                                 "bottom": 0.1,
                                 "wspace": 0.3,
                                 "hspace": 0.3,
                             },
                             figsize=(7, 8))
    rsp, lag = axes[0]
    hist, prob = axes[1]

    # Run Sequence Plot
    run_kws = run_kws if run_kws is not None else {}
    clearance = (max(series) - min(series)) * 1 / 10
    y_lim = (min(series) - clearance, max(series) + clearance)
    run_sequence_plot(series, y_lim=y_lim, ax=rsp, show=False, **run_kws)

    # Lag Plot
    lag_kws = lag_kws if lag_kws is not None else {}
    lag_plot(series, ax=lag, show=False, **lag_kws)

    # Histogram
    hist_kws = hist_kws if hist_kws is not None else {}
    histogram(series, ax=hist, show=False, **hist_kws)

    # Probability Plot
    prob_kws = prob_kws if prob_kws is not None else {}
    probability_plot(series, ax=prob, show=False, **prob_kws)

    fig.suptitle(main_title)

    show_and_save_plot(show=show, save=save, filename="4-plot.png")

Ejemplo n.º 13

0

Mostrar archivo

def lag_plot(series,
             lag=1,
             ax=None,
             x_lim=None,
             y_lim=None,
             title="Lag Plot",
             show=True,
             save=False,
             **kwargs):

    if ax is None:
        fig, ax = plt.subplots()

    pd.plotting.lag_plot(series, lag=lag, ax=ax, **kwargs)
    ax.set_xlim(x_lim)
    ax.set_ylim(y_lim)
    ax.set_title(title)

    show_and_save_plot(show=show, save=save, filename="lag_plot.png")

Ejemplo n.º 14

0

Mostrar archivo

Archivo: qq_plot.py Proyecto: buidinhan/eda

def qq_plot(series_1, series_2, x_label="Series 1", y_label="Series 2",
            title="Q-Q Plot", ax=None, show=True, save=False, **kwargs):

    if ax is None:
        fig, ax = plt.subplots()

    # Determining the values to be plotted
    N1, N2 = len(series_1), len(series_2)
    series_1 = sorted(series_1)
    series_2 = sorted(series_2)

    if N1 == N2:
        plotted_series_1 = series_1
        plotted_series_2 = series_2
        
    elif N1 > N2:
        plotted_series_1 = []
        plotted_series_2 = series_2

        for i2 in range(N2):
            i1 = int((N1-1)/(N2-1) * i2)
            plotted_series_1.append(series_1[i1])
            
    else:
        plotted_series_1 = series_1
        plotted_series_2 = []

        for i1 in range(N1):
            i2 = int((N2-1)/(N1-1) * i1)
            plotted_series_2.append(series_2[i2])

    # Scatter plot
    ax.scatter(plotted_series_1, plotted_series_2, **kwargs)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_title(title)

    # 45-degree reference line
    min_ = min(series_1[0], series_2[0])
    max_ = max(series_1[-1], series_2[-1])
    ax.plot((min_, max_), (min_, max_))

    show_and_save_plot(show=show, save=save, filename="qq_plot.png")

Ejemplo n.º 15

0

Mostrar archivo

Archivo: box_cox_linearity_plot.py Proyecto: buidinhan/eda

def box_cox_linearity_plot(X,
                           Y,
                           lambda_min=-2,
                           lambda_max=2,
                           N=100,
                           title=None,
                           ax=None,
                           show=True,
                           save=False):

    # Calculating lambda values and corresponding correlation coefficients
    lambdas = np.linspace(lambda_min, lambda_max, N)
    Rs = []
    for lambda_ in lambdas:
        transformed_X = box_cox_transformation(X, lambda_)
        _, _, R, _, _ = stats.linregress(transformed_X, Y)
        Rs.append(R)

    Rs = np.array(Rs)
    Rs_squared = Rs**2

    optimal_index = Rs_squared.argmax()
    optimal_lambda = lambdas[optimal_index]
    optimal_R = Rs[optimal_index]

    # Plotting
    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2}, figsize=(6, 4))

    ax.scatter(lambdas, Rs, marker="x")
    ax.set_title(title)
    ax.set_ylabel("Correlation Coefficient")

    x_label = "λ\n"
    x_label += "Best R={:.3f} at λ={:.3f}".format(optimal_R, optimal_lambda)
    ax.set_xlabel(x_label)

    show_and_save_plot(show=show, save=save, filename="box_cox_linearity.png")

    return lambdas, Rs, optimal_lambda, optimal_R

Ejemplo n.º 16

0

Mostrar archivo

def run_sequence_plot(series,
                      x_label="Index",
                      y_label="Measure",
                      title=None,
                      y_lim=None,
                      ax=None,
                      show=True,
                      save=False,
                      **kwargs):

    indices = [i + 1 for i in range(len(series))]

    if ax is None:
        fig, ax = plt.subplots()

    ax.plot(indices, series, **kwargs)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.set_ylim(y_lim)
    ax.set_title(title)

    show_and_save_plot(show=show, save=save, filename="run_sequence_plot.png")

Ejemplo n.º 17

0

Mostrar archivo

Archivo: box_cox_linearity_plot.py Proyecto: buidinhan/eda

def box_cox_linearity_set(X,
                          Y,
                          main_title=None,
                          show=True,
                          save=False,
                          box_cox_kws=None,
                          original_lin_kws=None,
                          transformed_lin_kws=None):

    fig, axes = plt.subplots(nrows=2,
                             ncols=2,
                             gridspec_kw={
                                 "left": 0.1,
                                 "right": 0.98,
                                 "bottom": 0.15,
                                 "top": 0.9,
                                 "wspace": 0.3,
                                 "hspace": 0.6,
                             },
                             figsize=(7, 8))
    org_lin, box_cox = axes[0]
    trans_lin, _ = axes[1]

    fig.suptitle(main_title)

    # Box-Cox Linearity Plot
    if box_cox_kws is None:
        box_cox_kws = {"title": "Box-Cox Transformation"}

    lambdas, Rs, optimal_lambda, optimal_R = \
             box_cox_linearity_plot(X, Y, ax=box_cox, show=False,
                                    save=False, **box_cox_kws)

    x_label = "λ\n"
    x_label += "Best R={:.3f} at λ={:.3f}".format(optimal_R, optimal_lambda)
    box_cox.set_xlabel(x_label)

    # Linearity Plot of the Original Data
    if original_lin_kws is None:
        original_lin_kws = {
            "title": "Original Data",
            "show_linear_fitting": True
        }
    scatter_plot(X, Y, ax=org_lin, show=False, save=False, **original_lin_kws)

    # Linearity Plot of the Transformed Data
    if transformed_lin_kws is None:
        transformed_lin_kws = {
            "title": "Transformed Data",
            "show_linear_fitting": True
        }

    transformed_X = box_cox_transformation(X, optimal_lambda)
    scatter_plot(transformed_X,
                 Y,
                 ax=trans_lin,
                 show=False,
                 save=False,
                 **transformed_lin_kws)

    # Hiding the empty plot in the bottom-right corner
    _.axis("off")

    show_and_save_plot(show=show,
                       save=save,
                       filename="box_cox_linearity_set.png")

    return lambdas, Rs, optimal_lambda, optimal_R

Ejemplo n.º 18

0

Mostrar archivo

def doe_statistic_matrix(df, response, factors, statistic="mean",
                         y_label=None, title=None,
                         show_overall_statistic=False,
                         figure_size=(8, 6), show=True, save=False,
                         **kwargs):

    df = df.copy()
    n_factors = len(factors)
    
    fig, axes = plt.subplots(nrows=n_factors, ncols=n_factors,
                             squeeze=False, sharey=True,
                             figsize=figure_size,
                             gridspec_kw={"left": 0.1, "right": 0.95,
                                          "bottom": 0.05, "top": 0.9,
                                          "wspace": 0, "hspace": 0.3})

    if statistic == "mean":
        overall_statistic = df[response].mean()
    elif statistic == "median":
        overall_statistic = df[response].median()
    elif statistic == "std":
        overall_statistic = df[response].std()
    else:
        raise ValueError("*statistic* should be 'mean', 'median', or 'std'.")    

    for pair in product(range(n_factors), range(n_factors)):
        row, col = pair
        ax = axes[row, col]
        
        if row > col: # Skip the cell
            ax.axis("off")
        
        elif row == col: # Single-factor scatter plot
            if show_overall_statistic:
                ax.axhline(y=overall_statistic)
            
            factor = factors[row]
            factor_levels = np.sort(df[factor].unique())

            def encode(level):
                return factor_levels.searchsorted(level)

            encoded_factors = [encode(x) for x in factor_levels]

            stat_by_level = []
            for level in factor_levels:
                if statistic == "mean":
                    stat_by_level.append(
                        df[df[factor]==level][response].mean())
                elif statistic == "median":
                    stat_by_level.append(
                        df[df[factor]==level][response].median())
                elif statistic == "std":
                    stat_by_level.append(
                        df[df[factor]==level][response].std())
                else:
                    raise ValueError(
                    "*statistic* should be 'mean', 'median', or 'std'.")
                                      
            ax.plot(encoded_factors, stat_by_level, **kwargs)
            ax.annotate(factor, xy=(0.5, 1.02), xycoords="axes fraction",
                        horizontalalignment="center")
            ax.set_xticks(list(range(len(factor_levels))))
            ax.set_xticklabels(factor_levels)
            ax.set_xlim(-0.5, len(factor_levels)-0.5)

        else: # Double-factor scatter plot
            if show_overall_statistic:
                ax.axhline(y=overall_statistic)
                
            factor_1 = factors[row]
            factor_2 = factors[col]

            try:
                combined_factor = df[factor_1] * df[factor_2]
                df["combined"] = combined_factor
                factor_levels = np.sort(combined_factor.unique())

                def encode(level):
                    return factor_levels.searchsorted(level)
                
                encoded_factors = [encode(x) for x in factor_levels]

                stat_by_level = []
                for level in factor_levels:
                    if statistic == "mean":
                        stat_by_level.append(
                            df[df["combined"]==level][response].mean())
                    elif statistic == "median":
                        stat_by_level.append(
                            df[df["combined"]==level][response].median())
                    elif statistic == "std":
                        stat_by_level.append(
                            df[df["combined"]==level][response].std())
                    else:
                        raise ValueError(
                        "*statistic* should be 'mean', 'median', or 'std'.")

                ax.plot(encoded_factors, stat_by_level, **kwargs)
                ax.annotate("{}*{}".format(factor_1, factor_2),
                            xy=(0.5, 1.02), xycoords="axes fraction",
                            horizontalalignment="center")
                ax.set_xticks(list(range(len(factor_levels))))
                ax.set_xticklabels(factor_levels)
                ax.set_xlim(-0.5, len(factor_levels)-0.5)
                
            except TypeError:
                raise TypeError("Factor levels should be encoded as integers.")

    if y_label is None:
        y_label = statistic.title() + " of Response"
        
    axes[0, 0].set_ylabel(y_label)
    fig.suptitle(title)
    show_and_save_plot(show=show, save=save,
                       filename="doe_statistic_matrix.png")

Ejemplo n.º 19

0

Mostrar archivo

Archivo: block_plot.py Proyecto: buidinhan/eda

def block_plot(df,
               response_name,
               plot_factor,
               grouping_factors,
               x_label=None,
               y_label=None,
               plot_factor_label=None,
               title=None,
               ax=None,
               figure_size=(6, 6),
               show=True,
               save=False):

    # Levels of the plot factor
    plot_levels = np.sort(df[plot_factor].unique())

    # Combinations of the grouping factors
    levels_by_factor = {}
    for factor in grouping_factors:
        levels_by_factor[factor] = np.sort(df[factor].unique())

    # Combinations = Levels_of_Factor_1 x Levels_of_Factor_2 x ...
    # (Cartesian product)
    combinations = list(itertools.product(*(levels_by_factor.values())))

    # Calculating response means grouped by all factors
    groups = df[[response_name, plot_factor,
                 *grouping_factors]].groupby([*grouping_factors, plot_factor])
    grouped_response_means = groups.mean()

    # Response means grouped by grouping factors
    response_means_by_combination = {}
    for combination in combinations:
        response_means_by_combination[combination] = {}
        try:
            for plot_level in plot_levels:
                response_means_by_combination[combination][plot_level] = \
                    grouped_response_means.loc[(*combination, plot_level),
                                               response_name]
        except KeyError:
            continue

    # Plotting
    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={
            "bottom": 0.15,
            "top": 0.95,
            "left": 0.1,
            "right": 0.96
        },
                               figsize=figure_size)

    for index, combination in enumerate(combinations):
        response_means_for_this_combination = \
                            response_means_by_combination[combination]

        if len(response_means_for_this_combination) <= 1:
            continue

        low = min(response_means_for_this_combination.values())
        high = max(response_means_for_this_combination.values())
        ax.bar(index,
               high - low,
               width=0.5,
               bottom=low,
               align="center",
               edgecolor="k",
               color="w")

        for plot_level in response_means_for_this_combination.keys():
            mean_at_this_level = \
                response_means_for_this_combination[plot_level]

            if mean_at_this_level not in (low, high):
                ax.plot([index], [mean_at_this_level], marker="^", color="k")

            ax.annotate(str(plot_level),
                        xy=(index, mean_at_this_level),
                        xycoords="data",
                        xytext=(-2, 1),
                        textcoords="offset points")

    if plot_factor_label is not None:
        plot_factor_label = "Plot Character = " + plot_factor_label
        ax.annotate(plot_factor_label,
                    xy=(0.01, 0.97),
                    xycoords="axes fraction")

    x_ticks = [index for index in range(len(combinations))]
    ax.set_xticks(x_ticks)

    if len(grouping_factors) > 1:
        x_tick_labels = [str(combination) for combination in combinations]
        ax.set_xticklabels(x_tick_labels, rotation=60)
    else:
        x_tick_labels = [str(combination[0]) for combination in combinations]
        ax.set_xticklabels(x_tick_labels)

    if x_label is None:
        if len(grouping_factors) > 1:
            x_label = "(" + ", ".join(grouping_factors) + ")"
        else:
            x_label = grouping_factors[0]

    if y_label is None:
        y_label = "Average Response"

    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

    show_and_save_plot(show=show, save=save, filename="block_plot.png")

Ejemplo n.º 20

0

Mostrar archivo

def doe_statistic_plot(df, response, factors, statistic="mean",
                       x_labels=None, y_label=None, title=None,
                       show_overall_statistic=False, figure_size=(8, 6),
                       show=True, save=False, **kwargs):

    fig, axes = plt.subplots(nrows=1, ncols=len(factors),
                             sharey=True, figsize=figure_size,
                             gridspec_kw={"left": 0.1, "right": 0.95,
                                          "bottom": 0.1, "top": 0.95,
                                          "wspace": 0,})
    if statistic == "mean":
        overall_statistic = df[response].mean()
    elif statistic == "median":
        overall_statistic = df[response].median()
    elif statistic == "std":
        overall_statistic = df[response].std()
    else:
        raise ValueError("*statistic* should be 'mean', 'median', or 'std'.")    
    
    for factor, ax in zip(factors, axes):
        if show_overall_statistic:
            ax.axhline(y=overall_statistic)
        
        factor_levels = np.sort(df[factor].unique())

        # levels -> 0, 1, 2, ...
        def encode(level):
            return factor_levels.searchsorted(level)

        encoded_levels = [encode(x) for x in factor_levels]

        stat_by_level = []
        for level in factor_levels:
            if statistic == "mean":
                stat_by_level.append(
                    df[df[factor]==level][response].mean())
            elif statistic == "median":
                stat_by_level.append(
                    df[df[factor]==level][response].median())
            elif statistic == "std":
                stat_by_level.append(
                    df[df[factor]==level][response].std())
            else:
                raise ValueError(
                    "*statistic* should be 'mean', 'median', or 'std'.")
                                     
        ax.plot(encoded_levels, stat_by_level, **kwargs)
        ax.set_xticks(list(range(len(factor_levels))))
        ax.set_xticklabels(factor_levels)
        ax.set_xlim(-0.5, len(factor_levels)-0.5)

    if x_labels is None:
        x_labels = factors

    for x_label, ax in zip(x_labels, axes):
        ax.set_xlabel(x_label)

    if y_label is None:
        y_label = statistic.title() + " of Response"
        
    axes[0].set_ylabel(y_label)
    fig.suptitle(title)

    show_and_save_plot(show=show, save=save,
                       filename="doe_statistic_plot.png")

Ejemplo n.º 21

0

Mostrar archivo

def weibull_plot(series, title=None, x_label="Measure",
                 y_label="Weibull Probability (%)", ax=None, show=True,
                 save=False, **kwargs):

    # Calculation of X and Y coordinates and fitting line parameters
    X = np.log(series) / np.log(10)
    X = np.sort(X)

    n = len(series)
    i = np.arange(1, n+1)
    p = (i-0.3) / (n+0.4)
    Y = np.log(-np.log(1-p))

    slope, intercept, r, p_value, error = stats.linregress(X, Y)
    X_for_fitting = np.linspace((-6.908-intercept)/slope, # p=0.001, y=-6.908
                                (1.933-intercept)/slope,  # p=0.999, y=1.933
                                10)
    series_for_fitting = 10 ** X_for_fitting
    Y_for_fitting = intercept + slope*X_for_fitting

    shape_parameter = slope / np.log(10) # beta
    scale_parameter = 10**(-intercept/slope) # eta

    # Plotting
    if ax is None:
        fig, ax = plt.subplots(gridspec_kw={"bottom": 0.2},
                               figsize=(6, 6))
    ax.scatter(series, Y, **kwargs)
    ax.plot(series_for_fitting, Y_for_fitting)
    ax.set_title(title)

    percentage_ticks = np.array([0.1, 0.5, 1, 5, 10, 20, 30, 40, 50, 60,
                                 70, 80, 90, 95, 99, 99.9])
    p_ticks = percentage_ticks / 100
    y_ticks = np.log(-np.log(1-p_ticks))
    ax.set_yticks(y_ticks)
    ax.set_yticklabels(percentage_ticks)
    ax.set_ylabel(y_label)
    ax.set_ylim((y_ticks[0], y_ticks[-1]))
    
    ax.set_xscale("log")
    ax.set_xlim((min(series)/10**1.5, max(series)*10**0.5))
    x_label += "\nr={:.3f}, ".format(r)
    x_label += "slope={:.3f}, ".format(slope)
    x_label += "intercept={:.3f}\n".format(intercept)
    x_label += "shape={:.3f}, ".format(shape_parameter)
    x_label += "scale={:.3f}".format(scale_parameter)
    ax.set_xlabel(x_label)

    # Horizontal and Vertical Lines
    ax.axhline(y=0)
    ax.axvline(x=scale_parameter)
    ax.annotate("63.2", xy=(0.005, 0.05),
                xycoords=("axes fraction", "data"))
    ax.annotate("{:.3f}".format(scale_parameter),
                xy=(scale_parameter, 0.01),
                xycoords=("data", "axes fraction"))
    
    show_and_save_plot(show=show, save=save, filename="weibull_plot.png")

    return slope, intercept, r, shape_parameter, scale_parameter

Ejemplo n.º 22

0

Mostrar archivo

Archivo: doe_scatter_plot.py Proyecto: buidinhan/eda

def doe_scatter_matrix(df,
                       response,
                       factors,
                       y_label="Response",
                       title=None,
                       figure_size=(8, 6),
                       show=True,
                       save=False,
                       **kwargs):

    n_factors = len(factors)

    fig, axes = plt.subplots(nrows=n_factors,
                             ncols=n_factors,
                             squeeze=False,
                             sharey=True,
                             figsize=figure_size,
                             gridspec_kw={
                                 "left": 0.1,
                                 "right": 0.95,
                                 "bottom": 0.05,
                                 "top": 0.9,
                                 "wspace": 0,
                                 "hspace": 0.3
                             })

    for pair in product(range(n_factors), range(n_factors)):
        row, col = pair
        ax = axes[row, col]

        if row > col:  # Skip the cell
            ax.axis("off")

        elif row == col:  # Single-factor scatter plot
            factor = factors[row]
            factor_levels = np.sort(df[factor].unique())

            def encode(level):
                return factor_levels.searchsorted(level)

            encoded_factors = [encode(x) for x in df[factor]]

            ax.scatter(encoded_factors, df[response], **kwargs)
            ax.annotate(factor,
                        xy=(0.5, 1.02),
                        xycoords="axes fraction",
                        horizontalalignment="center")

            ax.set_xticks(list(range(len(factor_levels))))
            ax.set_xticklabels(factor_levels)

            ax.set_xlim(-0.5, len(factor_levels) - 0.5)

        else:  # Double-factor scatter plot
            factor_1 = factors[row]
            factor_2 = factors[col]

            try:
                combined_factor = df[factor_1] * df[factor_2]
                factor_levels = np.sort(combined_factor.unique())

                def encode(level):
                    return factor_levels.searchsorted(level)

                encoded_factors = [encode(x) for x in combined_factor]

                ax.scatter(encoded_factors, df[response], **kwargs)
                ax.annotate("{}*{}".format(factor_1, factor_2),
                            xy=(0.5, 1.02),
                            xycoords="axes fraction",
                            horizontalalignment="center")

                ax.set_xticks(list(range(len(factor_levels))))
                ax.set_xticklabels(factor_levels)

                ax.set_xlim(-0.5, len(factor_levels) - 0.5)

            except TypeError:
                raise TypeError("Factor levels should be encoded as integers.")

    axes[0, 0].set_ylabel(y_label)
    fig.suptitle(title)
    show_and_save_plot(show=show, save=save, filename="doe_scatter_matrix.png")