def abplot_CI_bars(N, X, sig_level=0.05, dmin=None): """Returns a confidence interval bar plot for multivariate tests Parameters: N (list or tuple): sample size for all groups X (list or tuple): number of conversions for each variant sig_level (float): significance level dmin (float): minimum desired lift; a red and green dashed lines are shown on the plot if dmin is provided. Returns: None: A plot of the confidence interval bars is returned inline. """ # initiate plot object fig, ax = plt.subplots(figsize=(12, 3)) # get control group values N_A = N[0] X_A = X[0] # initiate containers for standard error and differences SE = [] d = [] # iterate through X and N and calculate d and SE for idx in range(1, len(N)): X_B = X[idx] N_B = N[idx] d.append(X_B / N_B - X_A / N_A) SE.append(pooled_SE(N_A, N_B, X_A, X_B)) # convert to numpy arrays SE = np.array(SE) d = np.array(d) y = np.arange(len(N) - 1) # get z value z = z_val(sig_level) # confidence interval values ci = SE * z # bar to represent the confidence interval ax.hlines(y, d - ci, d + ci, color='blue', alpha=0.35, lw=10, zorder=1) # marker for the mean ax.scatter(d, y, s=300, marker='|', lw=10, color='magenta', zorder=2) # vertical line to represent 0 ax.axvline(0, c='grey', linestyle='-') # plot veritcal dashed lines if dmin is provided if dmin is not None: ax.axvline(-dmin, c='red', linestyle='--', alpha=0.75) ax.axvline(dmin, c='green', linestyle='--', alpha=0.75) # invert y axis to show variant 1 at the top ax.invert_yaxis() # label variants on y axis labels = ['variant{}'.format(idx + 1) for idx in range(len(N) - 1)] plt.yticks(np.arange(len(N) - 1), labels)
def abplot(N_A, N_B, bcr, d_hat, sig_level=0.05, show_power=False, show_alpha=False, show_beta=False, show_p_value=False, show_legend=True): """Example plot of AB test Example: abplot(n=4000, bcr=0.11, d_hat=0.03) Parameters: n (int): total sample size for both control and test groups (N_A + N_B) bcr (float): base conversion rate; conversion rate of control d_hat: difference in conversion rate between the control and test groups, sometimes referred to as **minimal detectable effect** when calculating minimum sample size or **lift** when discussing positive improvement desired from launching a change. Returns: None: the function plots an AB test as two distributions for visualization purposes """ # create a plot object fig, ax = plt.subplots(figsize=(12, 6)) # define parameters to find pooled standard error X_A = bcr * N_A X_B = (bcr + d_hat) * N_B stderr = pooled_SE(N_A, N_B, X_A, X_B) # plot the distribution of the null and alternative hypothesis plot_null(ax, stderr) plot_alt(ax, stderr, d_hat) # set extent of plot area ax.set_xlim(-3 * d_hat, 3 * d_hat) # shade areas according to user input if show_power: show_area(ax, d_hat, stderr, sig_level, area_type='power') if show_alpha: show_area(ax, d_hat, stderr, sig_level, area_type='alpha') if show_beta: show_area(ax, d_hat, stderr, sig_level, area_type='beta') # show p_value based on the binomial distributions for the two groups if show_p_value: null = ab_dist(stderr, 'control') p_val = p_value(N_A, N_B, bcr, bcr+d_hat) ax.text(3 * stderr, null.pdf(0), 'p-value = {0:.3f}'.format(p_val), fontsize=12, ha='left') # option to show legend if show_legend: plt.legend() plt.xlabel('d') plt.ylabel('PDF') plt.show()
def funnel_CI_plot(A, B, sig_level=0.05): """Returns a confidence interval bar plot for multivariate tests Parameters: A (list of tuples): (sample size, conversions) for control group funnel B (list of tuples): (sample size, conversions) for test group funnel sig_level (float): significance level Returns: None: A plot of the confidence interval bars is returned inline. """ # initiate plot object fig, ax = plt.subplots(figsize=(12, 3)) # initiate containers for standard error and differences SE = [] d = [] # iterate through X and N and calculate d and SE for idx in range(len(A)): X_A = A[idx][1] N_A = A[idx][0] X_B = B[idx][1] N_B = B[idx][0] d.append(X_B / N_B - X_A / N_A) SE.append(pooled_SE(N_A, N_B, X_A, X_B)) # convert to numpy arrays SE = np.array(SE) d = np.array(d) print(d) y = np.arange(len(A)) # get z value z = z_val(sig_level) # confidence interval values ci = SE * z # bar to represent the confidence interval ax.hlines(y, d - ci, d + ci, color='blue', alpha=0.35, lw=10, zorder=1) # marker for the mean ax.scatter(d, y, s=300, marker='|', lw=10, color='magenta', zorder=2) # vertical line to represent 0 ax.axvline(0, c='grey', linestyle='-') # invert y axis to show variant 1 at the top ax.invert_yaxis() # label variants on y axis labels = ['metric{}'.format(idx + 1) for idx in range(len(A))] plt.yticks(np.arange(len(A)), labels)