def generate_t_distribution_chart_data(alpha, t_stat, n_1, n_2, x_bar_1, x_bar_2, s_1, s_2): welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) alpha_upper = t.ppf(1 - alpha/2, df=welches_df) alpha_lower = -alpha_upper # Determine X axis range x_min = -5 x_max = 5 x_axis = list(np.arange(x_min, x_max, (x_max - x_min) / 501)) x_axis_values = ["{:.3f}".format(x) for x in x_axis] H0 = [] significant = [] for value in x_axis: H0.append(t.pdf(value, df=welches_df)) if alpha_lower <= value <= alpha_upper: significant.append(None) else: significant.append(t.pdf(value, df=welches_df)) return { "title": "t-statistic distribution (effect size: {:.3f})".format(d), "xAxisLabel": "t", "yAxisLabel": "Density", "labels": x_axis_values, "verticalLine": { "position": "{:.3f}".format(utils.find_closest_value(x_axis, t_stat)), "label": "t statistic: {:.3f}".format(t_stat) }, "dataset": [ { "label": "H0", "data": H0, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": None }, { "label": "H0", "data": significant, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] } ] }
def calculate_power_from_means(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha): diff = abs(mu_1 - mu_2) df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) t_crit_os = t.ppf(q=1 - alpha, df=df) t_crit_ts = t.ppf(q=1 - alpha / 2, df=df) # Create Non-Centralized t-distribution d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2) nc = d * (2 / (1 / n_1 + 1 / n_2) / 2)**0.5 nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) power_os = 1 - nct_dist.cdf(x=t_crit_os) power_ts = 1 - nct_dist.cdf(x=t_crit_ts) return [[power_os], [power_ts]]
def create_p_value_from_means_formula(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2): formulae = [] step_1 = "t = \\frac{{|\\bar{{x_1}} - \\bar{{x_2}}|}}{{\\sqrt{{\\frac{{s_1^2}}{{n_1}} + \\frac{{s_2^2}}{{n_2}}}}}}" formulae.append(step_1) step_2 = "t = \\frac{{|{:.3f} - {:.3f}|}}{{\\sqrt{{\\frac{{{:.3f}^2}}{{{}}} + \\frac{{{:.3f}^2}}{{{}}}}}}}" formulae.append(step_2.format(x_bar_1, x_bar_2, s_1, n_1, s_2, n_2)) step_3 = "t = \\frac{{{:.3f}}}{{{:.3f}}} = {:.3f}" numerator = abs(x_bar_1 - x_bar_2) denominator = (s_1**2 / n_1 + s_2**2 / n_2)**0.5 t_stat = numerator / denominator formulae.append(step_3.format(numerator, denominator, t_stat)) p_value = 2 * (1 - t.cdf(t_stat, df=utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2))) step_4 = "p = 2 \\times P(T > {:.3f}) = {:.3f}" formulae.append(step_4.format(t_stat, p_value)) return formulae
def create_power_from_means_formula(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha): formulae = [] df = int(utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2)) sig = 1 - alpha/2 t_crit = t.ppf(q=sig, df=df) step_1 = "t_{{crit}} = t_{{1-\\alpha/2, \ \\upsilon}} = t_{{{:.3f}, \ {}}} = {:.3f}" formulae.append(step_1.format(sig, df, t_crit)) step_2 = "\\beta = P(T <= t_{{crit}})\ where\ T\ \\sim\ t_{{\\upsilon={},\ \\mu={:.3f}}}" d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2) nc = abs(d) * (2 / (1/n_1 + 1/n_2) / 2)**0.5 formulae.append(step_2.format(df, nc)) nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) beta = nct_dist.cdf(x=t_crit) step_3 = "\\beta = P(T <= {:.3f}) = {:.3f}" formulae.append(step_3.format(t_crit, beta)) step_4 = "1 - \\beta = 1 - {:.3f} = {:.3f}" formulae.append(step_4.format(beta, 1 - beta)) return formulae
def run_model(inputs): sample_fields = inputs['sampleFields'] results = {"charts": {}} d = None # TARGET: SAMPLE SIZE if inputs['target'] == "sample-size": alpha = float(inputs['alpha']) power = float(inputs['power']) enrolment_ratio = float(inputs['enrolmentRatio']) # Stats and Formulas if utils.all_sample_info_provided(sample_fields): mu_1 = float(sample_fields[0]['mean']) mu_2 = float(sample_fields[1]['mean']) sigma_1 = float(sample_fields[0]['stdDev']) sigma_2 = float(sample_fields[1]['stdDev']) results['statistics'] = calculate_sample_size_from_means( mu_1=mu_1, mu_2=mu_2, sigma_1=sigma_1, sigma_2=sigma_2, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) results['formulae'] = create_sample_size_from_means_formula( mu_1=mu_1, mu_2=mu_2, sigma_1=sigma_1, sigma_2=sigma_2, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) else: d = float(inputs['effectSize']) mu_1 = 0 mu_2 = mu_1 + d sigma_1, sigma_2 = 1, 1 results['statistics'] = calculate_sample_size_from_cohens_d( d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) results['formulae'] = create_sample_size_from_d_formula( d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) # Calculate vars n_1 = results['statistics'][0][1] n_2 = results['statistics'][1][1] if d is None: d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2) pooled_sd = utils.calculate_pooled_standard_deviation( n_1, n_2, sigma_1, sigma_2) # Notes results['notes'] = generate_sample_size_notes(alpha, power) results['chartText'] = generate_power_distributions_text(d=d, mu_1=mu_1, n_1=n_1, mu_2=mu_2, n_2=n_2, alpha=alpha, power=power) # Charts results['charts'][ 'chartOne'] = generate_power_vs_sample_size_chart_data( d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) results['charts'][ 'chartTwo'] = generate_effect_size_vs_sample_size_chart_data( d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio) results['charts'][ 'chartThree'] = generate_sampling_distributions_chart_data( mu_1=mu_1, mu_2=mu_2, sigma_1=sigma_1, sigma_2=sigma_2, n_1=n_1, n_2=n_2, alpha=alpha) # Labels results['labels'] = { "columns": ["", "One-sided test", "Two-sided test"], "rows": [ "Sample 1 (n<sub>1</sub>)", "Sample 2 (n<sub>2</sub>)", "All Samples (n<sub>1</sub> + n<sub>2</sub>)" ], } # TARGET: POWER elif inputs['target'] == "power": n_1 = int(sample_fields[0]['n']) n_2 = int(sample_fields[1]['n']) alpha = float(inputs['alpha']) # Statistics and Formulas if utils.all_sample_info_provided(sample_fields): mu_1 = float(sample_fields[0]['mean']) mu_2 = float(sample_fields[1]['mean']) sigma_1 = float(sample_fields[0]['stdDev']) sigma_2 = float(sample_fields[1]['stdDev']) d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2) results['statistics'] = calculate_power_from_means(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2, alpha=alpha) results['formulae'] = create_power_from_means_formula( mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2, alpha=alpha) else: d = float(inputs['effectSize']) mu_1 = 0 mu_2 = mu_1 + d sigma_1, sigma_2 = 1, 1 results['statistics'] = calculate_power_from_cohens_d(d=d, n_1=n_1, n_2=n_2, alpha=alpha) results['formulae'] = create_power_from_d_formula(d=d, n_1=n_1, n_2=n_2, alpha=alpha) # Calculate vars power = results['statistics'][1][0] if d is None: d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2) pooled_sd = utils.calculate_pooled_standard_deviation( n_1, n_2, sigma_1, sigma_2) # Notes welches_df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) results['notes'] = generate_power_notes(alpha=alpha, df=welches_df) results['chartText'] = generate_power_distributions_text(d=d, mu_1=mu_1, n_1=n_1, mu_2=mu_2, n_2=n_2, alpha=alpha, power=power) # Charts results['charts'][ 'chartOne'] = generate_sample_size_vs_power_chart_data(d=d, alpha=alpha, power=power, n_1=n_1, n_2=n_2) results['charts'][ 'chartTwo'] = generate_effect_size_vs_power_chart_data(d=d, alpha=alpha, n_1=n_1, n_2=n_2) results['charts'][ 'chartThree'] = generate_sampling_distributions_chart_data( mu_1=mu_1, mu_2=mu_2, sigma_1=sigma_1, sigma_2=sigma_2, n_1=n_1, n_2=n_2, alpha=alpha) # Labels results['labels'] = { "columns": ["Test type", "Statistical Power (1 - β)"], "rows": ["One-sided test", "Two-sided test"], } # TARGET: MIN EFFECT SIZE elif inputs['target'] == "min-effect": n_1 = int(sample_fields[0]['n']) n_2 = int(sample_fields[1]['n']) alpha = float(inputs['alpha']) power = float(inputs['power']) results['statistics'] = calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=power) results['formulae'] = create_min_effect_size_formula(n_1=n_1, n_2=n_2, alpha=alpha, power=power) # Calculate Vars d = results['statistics'][1][0] mu_1 = 0 mu_2 = mu_1 + d sigma_1, sigma_2 = 1, 1 pooled_sd = utils.calculate_pooled_standard_deviation( n_1, n_2, sigma_1, sigma_2) # Notes welches_df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) results['notes'] = generate_min_effect_size_notes(alpha=alpha, power=power, df=welches_df) results['chartText'] = generate_power_distributions_text(d=d, mu_1=mu_1, n_1=n_1, mu_2=mu_2, n_2=n_2, alpha=alpha, power=power) # Charts results['charts'][ 'chartOne'] = generate_sample_size_vs_effect_size_data(d=d, alpha=alpha, power=power, n_1=n_1, n_2=n_2) results['charts']['chartTwo'] = generate_power_vs_effect_size_data( d=d, alpha=alpha, power=power, n_1=n_1, n_2=n_2) results['charts'][ 'chartThree'] = generate_sampling_distributions_chart_data( mu_1=mu_1, mu_2=mu_2, sigma_1=sigma_1, sigma_2=sigma_2, n_1=n_1, n_2=n_2, alpha=alpha) # Labels results['labels'] = { "columns": ["Test type", "Minimum effect size"], "rows": ["One-sided test", "Two-sided test"], } # TARGET: T-STATISTIC elif inputs['target'] == "t-stat": n_1 = int(sample_fields[0]['n']) n_2 = int(sample_fields[1]['n']) alpha = float(inputs['alpha']) # Statistics if utils.all_sample_info_provided(sample_fields): x_bar_1 = float(sample_fields[0]['mean']) s_1 = float(sample_fields[0]['stdDev']) x_bar_2 = float(sample_fields[1]['mean']) s_2 = float(sample_fields[1]['stdDev']) d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) t_stat = calculate_t_stat_from_means(x_bar_1=x_bar_1, s_1=s_1, n_1=n_1, x_bar_2=x_bar_2, s_2=s_2, n_2=n_2) results['formulae'] = create_t_stat_from_means_formula( x_bar_1=x_bar_1, s_1=s_1, n_1=n_1, x_bar_2=x_bar_2, s_2=s_2, n_2=n_2) else: d = float(inputs['effectSize']) x_bar_1, x_bar_2 = 1, 1 + d s_1, s_2 = 1, 1 t_stat = calculate_t_stat_from_cohens_d(d=d, n_1=n_1, n_2=n_2) results['formulae'] = create_t_stat_from_d_formula(d=d, n_1=n_1, n_2=n_2) # Format Stats welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) t_critical_os = t.ppf(1 - alpha, df=welches_df) t_critical_ts = t.ppf(1 - alpha / 2, df=welches_df) results['statistics'] = [ [t_stat, t_critical_os], [t_stat, t_critical_ts], ] # Notes results['notes'] = generate_t_stat_notes(n_1, n_2, d, t_stat) results['chartText'] = generate_test_distribution_text( alpha=alpha, n_1=n_1, n_2=n_2, df=int(welches_df)) # Charts t_stat = results['statistics'][0][0] results['charts'][ 'chartOne'] = generate_t_statistic_vs_effect_size_chart_data( n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2, alpha=alpha) results['charts'][ 'chartTwo'] = generate_t_statistic_vs_sample_size_chart_data( n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2, alpha=alpha) results['charts']['chartThree'] = generate_t_distribution_chart_data( alpha=alpha, t_stat=t_stat, n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2) # Labels results['labels'] = { "columns": ["Test Type", "t-statistic", 't-critical'], "rows": ["One-sided test", "Two-sided test"], } # TARGET: P-VALUE elif inputs['target'] == "p-value": n_1 = int(sample_fields[0]['n']) n_2 = int(sample_fields[1]['n']) alpha = float(inputs['alpha']) # Statistics if utils.all_sample_info_provided(sample_fields): x_bar_1 = float(sample_fields[0]['mean']) s_1 = float(sample_fields[0]['stdDev']) x_bar_2 = float(sample_fields[1]['mean']) s_2 = float(sample_fields[1]['stdDev']) d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) t_stat = calculate_t_stat_from_means(x_bar_1=x_bar_1, s_1=s_1, n_1=n_1, x_bar_2=x_bar_2, s_2=s_2, n_2=n_2) results['statistics'] = calculate_p_value(t_stat=t_stat, df=welches_df) results['formulae'] = create_p_value_from_means_formula( x_bar_1=x_bar_1, s_1=s_1, n_1=n_1, x_bar_2=x_bar_2, s_2=s_2, n_2=n_2) else: d = float(inputs['effectSize']) x_bar_1, x_bar_2 = 1, 1 + d s_1, s_2 = 1, 1 t_stat = calculate_t_stat_from_cohens_d(d=d, n_1=n_1, n_2=n_2) results['statistics'] = calculate_p_value(t_stat=t_stat, df=(n_1 + n_2 - 2)) results['formulae'] = create_p_value_from_d_formula(d=d, n_1=n_1, n_2=n_2) # Notes results['notes'] = generate_p_value_notes( n_1=n_1, n_2=n_2, d=d, p_one_sided=results['statistics'][0][0], p_two_sided=results['statistics'][1][0], t_stat=t_stat) welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) results['chartText'] = generate_test_distribution_text( alpha=alpha, n_1=n_1, n_2=n_2, df=int(welches_df)) # Charts results['charts'][ 'chartOne'] = generate_p_value_vs_effect_size_chart_data( n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2, alpha=alpha) results['charts'][ 'chartTwo'] = generate_p_value_vs_sample_size_chart_data( n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2, alpha=alpha) results['charts']['chartThree'] = generate_t_distribution_chart_data( alpha=alpha, t_stat=t_stat, n_1=n_1, n_2=n_2, x_bar_1=x_bar_1, x_bar_2=x_bar_2, s_1=s_1, s_2=s_2) # Labels results['labels'] = { "columns": ["Test Type", "p-value"], "rows": ["One-sided test", "Two-sided test"], } return results
def generate_p_value_vs_effect_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha): d_raw = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2) d_results = tt.calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=0.5) d_target = d_results[1][0] / d_adjustment ff = 0.5 if d_raw < 0: x_min = min(-d_target, d_raw) * (1 + ff) x_max = max(-d_target, d_raw) * (1 - ff) else: x_min = min(d_target, d_raw) * (1 - ff) x_max = max(d_target, d_raw) * (1 + ff) # Rounding to ensure matches step = (x_max - x_min) / 500 dps = utils.determine_decimal_points(x_max) effect_sizes = [round(x, dps) for x in np.arange(x_min, x_max, step)] d_actual = utils.find_closest_value(effect_sizes, d_raw) d_target = utils.find_closest_value(effect_sizes, d_target) os_lower = [] ts_lower = [] os_higher = [] ts_higher = [] for d in effect_sizes: welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2) t_stat = tt.calculate_t_stat_from_cohens_d(d, n_1, n_2) * d_adjustment results = tt.calculate_p_value(t_stat, welches_df) if d <= d_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) else: os_lower.append(None) ts_lower.append(None) if d >= d_target: os_higher.append(results[0][0]) ts_higher.append(results[1][0]) else: os_higher.append(None) ts_higher.append(None) # Determine X axis range format_string = "{:." + str(dps) + "f}" x_axis_values = [format_string.format(x) for x in list(effect_sizes)] return { "title": "p-value vs Effect Size (sample size: {}, enrolment ratio: {:.3f})".format(n_1 + n_2, n_1/n_2), "xAxisLabel": "Effect Size (d)", "yAxisLabel": "p-value", "labels": x_axis_values, "verticalLine": { "position": format_string.format(d_actual), "label": "Effect Size: " + format_string.format(d_raw) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_p_value_vs_sample_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha): d_actual = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2) d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2) n_raw = n_1 + n_2 r_e = n_1 / n_2 n_results = tt.calculate_sample_size_from_means(mu_1=x_bar_1, mu_2=x_bar_2, sigma_1=s_1, sigma_2=s_2, alpha=alpha, power=0.5, enrolment_ratio=r_e) n_target = n_results[0][1] + n_results[1][1] ff = 0.1 x_min = int(max(4, min(n_raw * (1 - ff), n_target * (1 - ff)))) x_max = int(max(n_raw * (1 + ff), n_target * (1 + ff))) step = int(max(1, (x_max - x_min) / 500)) sample_sizes = np.arange(x_min, x_max, step) n_actual = utils.find_closest_value(sample_sizes, n_raw) n_target = utils.find_closest_value(sample_sizes, n_target) os_lower = [] ts_lower = [] os_higher = [] ts_higher = [] for n in sample_sizes: cn_1 = math.ceil(n * r_e / (1 + r_e)) cn_2 = math.ceil(n - cn_1) welches_df = utils.welches_degrees_of_freedom(s_1, cn_1, s_2, cn_2) d = utils.calculate_cohens_d(x_bar_1, s_1, cn_1, x_bar_2, s_2, cn_2) t_stat = tt.calculate_t_stat_from_cohens_d(d, cn_1, cn_2) * d_adjustment results = tt.calculate_p_value(t_stat, welches_df) if n <= n_target: os_lower.append(results[0][0]) ts_lower.append(results[1][0]) else: os_lower.append(None) ts_lower.append(None) if n >= n_target: os_higher.append(results[0][0]) ts_higher.append(results[1][0]) else: os_higher.append(None) ts_higher.append(None) # Determine X axis range x_axis_values = [str(x) for x in list(sample_sizes)] return { "title": "Sample Size vs p-value (effect size: {:.3f}, enrolment ratio: {:.3f})".format(d_actual, n_1/n_2), "xAxisLabel": "Total Samples", "yAxisLabel": "p-value", "labels": x_axis_values, "verticalLine": { "position": str(n_actual), "label": "Sample Size: {}".format(n_raw) }, "dataset": [ { "label": "One Sided Test", "data": os_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "One Sided Test", "data": os_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] }, { "label": "Two Sided Test", "data": ts_lower, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "Two Sided Test", "data": ts_higher, "pointBorderWidth": 0, "pointRadius": 0.5, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }
def generate_sampling_distributions_chart_data(mu_1, mu_2, sigma_1, sigma_2, n_1, n_2, alpha): n = n_1 + n_2 - 2 df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2) H0_mean = 0 HA_mean = mu_2 - mu_1 sd_pooled = utils.calculate_pooled_standard_deviation(n_1, n_2, sigma_1, sigma_2) se = sd_pooled * (1/n_1 + 1/n_2)**0.5 d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2) nc = d * (2 / (1/n_1 + 1/n_2) / 2)**0.5 if mu_1 > mu_2: nc *= -1 # Determine X axis range x_min = min(H0_mean, nc) - 4 x_max = max(H0_mean, nc) + 4 x_axis_values = list(np.linspace(start=x_min, stop=x_max, num=1000, endpoint=True)) alpha_lower = t.ppf(q=alpha/2, df=df) alpha_upper = -1 * alpha_lower # Insert key values for val in [H0_mean, nc, alpha_lower, alpha_upper]: if val not in x_axis_values: bisect.insort(x_axis_values, val) H0_significant = [] H0_not_significant = [] HA_powered = [] HA_unpowered = [] threshold = alpha_upper if HA_mean >= H0_mean else alpha_lower nct_dist = utils.initialize_nct_distribution(df=df, nc=nc) for value in x_axis_values: # Null Hypothesis H0_not_significant.append(t.pdf(x=value, df=df)) if value < alpha_lower or value > alpha_upper: H0_significant.append(t.pdf(x=value, df=df)) else: H0_significant.append(None) # Alternative Hypothesis HA_powered.append(nct_dist.pdf(x=value)) if HA_mean < H0_mean and value > alpha_lower: HA_unpowered.append(nct_dist.pdf(x=value)) elif HA_mean >= H0_mean and value < alpha_upper: HA_unpowered.append(nct_dist.pdf(x=value)) else: HA_unpowered.append(None) if HA_mean < H0_mean: power = nct_dist.cdf(x=alpha_lower) threshold = alpha_lower else: power = 1 - nct_dist.cdf(x=alpha_upper) threshold = alpha_upper decimal_points = utils.determine_decimal_points(x_max) format_string = "{:." + str(decimal_points) + "f}" return { "title": "Central and Noncentral Distributions (effect size: {:0.3f}, α: {:0.3f}, power (1 - β): {:.1%})".format(d, alpha, power), "xAxisLabel": "t statistic", "yAxisLabel": "Density", "labels": [format_string.format(x) for x in x_axis_values], "verticalLine": { "position": format_string.format(utils.find_closest_value(x_axis_values, threshold)), "label": "t crit: " + format_string.format(threshold) }, "hidePoints": True, "dataset": [ { "label": "H0 - Significant", "data": H0_significant, "borderColor": colors.line_colors[0], "backgroundColor": colors.background_colors[0] }, { "label": "H0", "data": H0_not_significant, "borderColor": colors.line_colors[0], "backgroundColor": None }, { "label": "HA - Powered", "data": HA_powered, "borderColor": colors.line_colors[1], "backgroundColor": None }, { "label": "HA", "data": HA_unpowered, "borderColor": colors.line_colors[1], "backgroundColor": colors.background_colors[1] } ] }