def approximate_confidence_interval(x): a = 1.0*np.array(x) n = len(a) m, se = np.mean(a), sem(a) h = se * norm._ppf((1+0.95)/2) return m-h, m+h
def get_Z_alpha(confidence): var = 1 - (1 - confidence) / 2 return norm._ppf(var)
def make_plot_probit(title, input_data, x_label): '''Creates Probit plot for EUR and data that has a log-normal distribution. ''' # Calculate log-normal distribtion for input data sigma, floc, scale = lognorm.fit(input_data, floc=0) mu = math.log(scale) x = np.linspace(0.001, np.max(input_data) + np.mean(input_data), 1000) pdf = 1/(x * sigma * np.sqrt(2*np.pi)) * \ np.exp(-(np.log(x)-mu)**2 / (2*sigma**2)) cdf = (1 + scipy.special.erf((np.log(x) - mu) / (np.sqrt(2) * sigma))) / 2 p = figure(title=title, background_fill_color="#fafafa", x_axis_type='log') # Prepare input data for plot input_data_log = np.log(input_data) # Get percentile of each point by getting rank/len(data) input_data_log_sorted = np.argsort(input_data_log) ranks = np.empty_like(input_data_log_sorted) ranks[input_data_log_sorted] = np.arange(len(input_data_log)) # Add 1 to length of data because norm._ppf(1) is infinite, which will occur for highest ranked value input_data_log_perc = [(x + 1) / (len(input_data_log_sorted) + 1) for x in ranks] input_data_y_values = norm._ppf(input_data_log_perc) # Prepare fitted line for plot x_y_values = norm._ppf(cdf) # Values to display on y axis instead of z values from ppf y_axis = [1 - x for x in cdf] # Plot input data values p.scatter(input_data, input_data_y_values, size=15, line_color="navy", legend="Input Data", marker='circle_cross') p.line(x, x_y_values, line_width=3, line_color="red", legend="Best Fit") # calculate P90, P50, P10 p10_param = find_nearest(cdf, 0.9) p10 = round(x[p10_param[1]]) p50_param = find_nearest(cdf, 0.5) p50 = round(x[p50_param[1]]) p90_param = find_nearest(cdf, 0.1) p90 = round(x[p90_param[1]]) # Add P90, P50, P10 markers p.scatter(p90, norm._ppf(0.10), size=15, line_color="black", fill_color='darkred', legend=f"P90 = {int(p90)}", marker='square_x') p.scatter(p50, norm._ppf(0.50), size=15, line_color="black", fill_color='blue', legend=f"P50 = {int(p50)}", marker='square_x') p.scatter(p10, norm._ppf(0.90), size=15, line_color="black", fill_color='red', legend=f"P10 = {int(p10)}", marker='square_x') # Add P90, P50, P10 segments # p.segment(1, norm._ppf(0.10), np.max(x), norm._ppf(0.10), line_dash='dashed', line_width=2, line_color='black', legend="P90") # p.segment(1, norm._ppf(0.50), np.max(x), norm._ppf(0.50), line_dash='dashed', line_width=2, line_color='black', legend="P50") # p.segment(1, norm._ppf(0.90), np.max(x), norm._ppf(0.90), line_dash='dashed', line_width=2, line_color='black', legend="P10") p.segment(p90, -4, p90, np.max(x_y_values), line_dash='dashed', line_width=2, line_color='darkred', legend=f"P90 = {int(p90)}") p.segment(p50, -4, p50, np.max(x_y_values), line_dash='dashed', line_width=2, line_color='blue', legend=f"P50 = {int(p50)}") p.segment(p10, -4, p10, np.max(x_y_values), line_dash='dashed', line_width=2, line_color='red', legend=f"P10 = {int(p10)}") # Find min for x axis x_min = int(np.log10(np.min(input_data))) power_of_10 = 10**(x_min) # Plot Styling p.x_range.start = power_of_10 p.y_range.start = -3 p.legend.location = "top_left" p.legend.background_fill_color = "#fefefe" p.xaxis.axis_label = x_label p.yaxis.axis_label = 'Z' p.left[0].formatter.use_scientific = False p.xaxis[0].formatter = NumeralTickFormatter(format="0,0") p.yaxis.visible = False p.title.text = title p.title.align = 'center' p.legend.click_policy = "hide" return p
def _solve_power_for_pct(self, pct): z = norm._ppf(pct) error = lambda est: self._compute_stouffer_z_at_power(est) - z return opt.brentq(error, .0501, .9999)
def _compute_stouffer_z(pp): isnotnan = ~np.isnan(pp) pp_notnan = pp[isnotnan] return np.sum(norm._ppf(pp_notnan)) / np.sqrt(isnotnan.sum())
def gaussian_icdf(h, Xi, x): return Xi + h**2 * norm._ppf(x)