예제 #1
0
def generate_t_distribution_chart_data(alpha, t_stat, n_1, n_2, x_bar_1, x_bar_2, s_1, s_2):
    welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2)
    d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2)
    alpha_upper = t.ppf(1 - alpha/2, df=welches_df)
    alpha_lower = -alpha_upper

    # Determine X axis range
    x_min = -5
    x_max = 5
    x_axis = list(np.arange(x_min, x_max, (x_max - x_min) / 501))
    x_axis_values = ["{:.3f}".format(x) for x in x_axis]

    H0 = []
    significant = []
    for value in x_axis:
        H0.append(t.pdf(value, df=welches_df))
        if alpha_lower <= value <= alpha_upper:
            significant.append(None)
        else:
            significant.append(t.pdf(value, df=welches_df))

    return {
        "title": "t-statistic distribution (effect size: {:.3f})".format(d),
        "xAxisLabel": "t",
        "yAxisLabel": "Density",
        "labels": x_axis_values,
        "verticalLine": {
            "position": "{:.3f}".format(utils.find_closest_value(x_axis, t_stat)),
            "label": "t statistic: {:.3f}".format(t_stat)
        },
        "dataset": [
            {
                "label": "H0",
                "data": H0,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": None
            },
            {
                "label": "H0",
                "data": significant,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            }
        ]
    }
예제 #2
0
def calculate_power_from_means(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha):
    diff = abs(mu_1 - mu_2)
    df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2)
    t_crit_os = t.ppf(q=1 - alpha, df=df)
    t_crit_ts = t.ppf(q=1 - alpha / 2, df=df)

    # Create Non-Centralized t-distribution
    d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2)
    nc = d * (2 / (1 / n_1 + 1 / n_2) / 2)**0.5
    nct_dist = utils.initialize_nct_distribution(df=df, nc=nc)

    power_os = 1 - nct_dist.cdf(x=t_crit_os)
    power_ts = 1 - nct_dist.cdf(x=t_crit_ts)

    return [[power_os], [power_ts]]
예제 #3
0
def create_power_from_means_formula(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2, alpha):
    formulae = []
    df = int(utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2))
    sig = 1 - alpha/2
    t_crit = t.ppf(q=sig, df=df)
    step_1 = "t_{{crit}} = t_{{1-\\alpha/2, \ \\upsilon}} = t_{{{:.3f}, \ {}}} = {:.3f}"
    formulae.append(step_1.format(sig, df, t_crit))

    step_2 = "\\beta = P(T <= t_{{crit}})\ where\ T\ \\sim\ t_{{\\upsilon={},\ \\mu={:.3f}}}"
    d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2, n_2)
    nc = abs(d) * (2 / (1/n_1 + 1/n_2) / 2)**0.5
    formulae.append(step_2.format(df, nc))

    nct_dist = utils.initialize_nct_distribution(df=df, nc=nc)
    beta = nct_dist.cdf(x=t_crit)
    step_3 = "\\beta = P(T <= {:.3f}) = {:.3f}"
    formulae.append(step_3.format(t_crit, beta))

    step_4 = "1 - \\beta = 1 - {:.3f} = {:.3f}"
    formulae.append(step_4.format(beta, 1 - beta))

    return formulae
예제 #4
0
def run_model(inputs):
    sample_fields = inputs['sampleFields']
    results = {"charts": {}}
    d = None

    # TARGET: SAMPLE SIZE
    if inputs['target'] == "sample-size":
        alpha = float(inputs['alpha'])
        power = float(inputs['power'])
        enrolment_ratio = float(inputs['enrolmentRatio'])

        # Stats and Formulas
        if utils.all_sample_info_provided(sample_fields):
            mu_1 = float(sample_fields[0]['mean'])
            mu_2 = float(sample_fields[1]['mean'])
            sigma_1 = float(sample_fields[0]['stdDev'])
            sigma_2 = float(sample_fields[1]['stdDev'])
            results['statistics'] = calculate_sample_size_from_means(
                mu_1=mu_1,
                mu_2=mu_2,
                sigma_1=sigma_1,
                sigma_2=sigma_2,
                alpha=alpha,
                power=power,
                enrolment_ratio=enrolment_ratio)
            results['formulae'] = create_sample_size_from_means_formula(
                mu_1=mu_1,
                mu_2=mu_2,
                sigma_1=sigma_1,
                sigma_2=sigma_2,
                alpha=alpha,
                power=power,
                enrolment_ratio=enrolment_ratio)
        else:
            d = float(inputs['effectSize'])
            mu_1 = 0
            mu_2 = mu_1 + d
            sigma_1, sigma_2 = 1, 1
            results['statistics'] = calculate_sample_size_from_cohens_d(
                d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio)
            results['formulae'] = create_sample_size_from_d_formula(
                d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio)

        # Calculate vars
        n_1 = results['statistics'][0][1]
        n_2 = results['statistics'][1][1]
        if d is None:
            d = utils.calculate_cohens_d(mu_1=mu_1,
                                         sigma_1=sigma_1,
                                         n_1=n_1,
                                         mu_2=mu_2,
                                         sigma_2=sigma_2,
                                         n_2=n_2)
        pooled_sd = utils.calculate_pooled_standard_deviation(
            n_1, n_2, sigma_1, sigma_2)

        # Notes
        results['notes'] = generate_sample_size_notes(alpha, power)
        results['chartText'] = generate_power_distributions_text(d=d,
                                                                 mu_1=mu_1,
                                                                 n_1=n_1,
                                                                 mu_2=mu_2,
                                                                 n_2=n_2,
                                                                 alpha=alpha,
                                                                 power=power)

        # Charts
        results['charts'][
            'chartOne'] = generate_power_vs_sample_size_chart_data(
                d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio)
        results['charts'][
            'chartTwo'] = generate_effect_size_vs_sample_size_chart_data(
                d=d, alpha=alpha, power=power, enrolment_ratio=enrolment_ratio)
        results['charts'][
            'chartThree'] = generate_sampling_distributions_chart_data(
                mu_1=mu_1,
                mu_2=mu_2,
                sigma_1=sigma_1,
                sigma_2=sigma_2,
                n_1=n_1,
                n_2=n_2,
                alpha=alpha)

        # Labels
        results['labels'] = {
            "columns": ["", "One-sided test", "Two-sided test"],
            "rows": [
                "Sample 1 (n<sub>1</sub>)", "Sample 2 (n<sub>2</sub>)",
                "All Samples (n<sub>1</sub> + n<sub>2</sub>)"
            ],
        }

    # TARGET: POWER
    elif inputs['target'] == "power":
        n_1 = int(sample_fields[0]['n'])
        n_2 = int(sample_fields[1]['n'])
        alpha = float(inputs['alpha'])

        # Statistics and Formulas
        if utils.all_sample_info_provided(sample_fields):
            mu_1 = float(sample_fields[0]['mean'])
            mu_2 = float(sample_fields[1]['mean'])
            sigma_1 = float(sample_fields[0]['stdDev'])
            sigma_2 = float(sample_fields[1]['stdDev'])
            d = utils.calculate_cohens_d(mu_1, sigma_1, n_1, mu_2, sigma_2,
                                         n_2)
            results['statistics'] = calculate_power_from_means(mu_1=mu_1,
                                                               sigma_1=sigma_1,
                                                               n_1=n_1,
                                                               mu_2=mu_2,
                                                               sigma_2=sigma_2,
                                                               n_2=n_2,
                                                               alpha=alpha)
            results['formulae'] = create_power_from_means_formula(
                mu_1=mu_1,
                sigma_1=sigma_1,
                n_1=n_1,
                mu_2=mu_2,
                sigma_2=sigma_2,
                n_2=n_2,
                alpha=alpha)
        else:
            d = float(inputs['effectSize'])
            mu_1 = 0
            mu_2 = mu_1 + d
            sigma_1, sigma_2 = 1, 1
            results['statistics'] = calculate_power_from_cohens_d(d=d,
                                                                  n_1=n_1,
                                                                  n_2=n_2,
                                                                  alpha=alpha)
            results['formulae'] = create_power_from_d_formula(d=d,
                                                              n_1=n_1,
                                                              n_2=n_2,
                                                              alpha=alpha)

        # Calculate vars
        power = results['statistics'][1][0]
        if d is None:
            d = utils.calculate_cohens_d(mu_1=mu_1,
                                         sigma_1=sigma_1,
                                         n_1=n_1,
                                         mu_2=mu_2,
                                         sigma_2=sigma_2,
                                         n_2=n_2)
        pooled_sd = utils.calculate_pooled_standard_deviation(
            n_1, n_2, sigma_1, sigma_2)

        # Notes
        welches_df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2,
                                                      n_2)
        results['notes'] = generate_power_notes(alpha=alpha, df=welches_df)
        results['chartText'] = generate_power_distributions_text(d=d,
                                                                 mu_1=mu_1,
                                                                 n_1=n_1,
                                                                 mu_2=mu_2,
                                                                 n_2=n_2,
                                                                 alpha=alpha,
                                                                 power=power)

        # Charts
        results['charts'][
            'chartOne'] = generate_sample_size_vs_power_chart_data(d=d,
                                                                   alpha=alpha,
                                                                   power=power,
                                                                   n_1=n_1,
                                                                   n_2=n_2)
        results['charts'][
            'chartTwo'] = generate_effect_size_vs_power_chart_data(d=d,
                                                                   alpha=alpha,
                                                                   n_1=n_1,
                                                                   n_2=n_2)
        results['charts'][
            'chartThree'] = generate_sampling_distributions_chart_data(
                mu_1=mu_1,
                mu_2=mu_2,
                sigma_1=sigma_1,
                sigma_2=sigma_2,
                n_1=n_1,
                n_2=n_2,
                alpha=alpha)

        # Labels
        results['labels'] = {
            "columns": ["Test type", "Statistical Power (1 - β)"],
            "rows": ["One-sided test", "Two-sided test"],
        }

    # TARGET: MIN EFFECT SIZE
    elif inputs['target'] == "min-effect":
        n_1 = int(sample_fields[0]['n'])
        n_2 = int(sample_fields[1]['n'])
        alpha = float(inputs['alpha'])
        power = float(inputs['power'])
        results['statistics'] = calculate_min_effect_size(n_1=n_1,
                                                          n_2=n_2,
                                                          alpha=alpha,
                                                          power=power)
        results['formulae'] = create_min_effect_size_formula(n_1=n_1,
                                                             n_2=n_2,
                                                             alpha=alpha,
                                                             power=power)

        # Calculate Vars
        d = results['statistics'][1][0]
        mu_1 = 0
        mu_2 = mu_1 + d
        sigma_1, sigma_2 = 1, 1
        pooled_sd = utils.calculate_pooled_standard_deviation(
            n_1, n_2, sigma_1, sigma_2)

        # Notes
        welches_df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2,
                                                      n_2)
        results['notes'] = generate_min_effect_size_notes(alpha=alpha,
                                                          power=power,
                                                          df=welches_df)
        results['chartText'] = generate_power_distributions_text(d=d,
                                                                 mu_1=mu_1,
                                                                 n_1=n_1,
                                                                 mu_2=mu_2,
                                                                 n_2=n_2,
                                                                 alpha=alpha,
                                                                 power=power)

        # Charts
        results['charts'][
            'chartOne'] = generate_sample_size_vs_effect_size_data(d=d,
                                                                   alpha=alpha,
                                                                   power=power,
                                                                   n_1=n_1,
                                                                   n_2=n_2)
        results['charts']['chartTwo'] = generate_power_vs_effect_size_data(
            d=d, alpha=alpha, power=power, n_1=n_1, n_2=n_2)
        results['charts'][
            'chartThree'] = generate_sampling_distributions_chart_data(
                mu_1=mu_1,
                mu_2=mu_2,
                sigma_1=sigma_1,
                sigma_2=sigma_2,
                n_1=n_1,
                n_2=n_2,
                alpha=alpha)

        # Labels
        results['labels'] = {
            "columns": ["Test type", "Minimum effect size"],
            "rows": ["One-sided test", "Two-sided test"],
        }

    # TARGET: T-STATISTIC
    elif inputs['target'] == "t-stat":
        n_1 = int(sample_fields[0]['n'])
        n_2 = int(sample_fields[1]['n'])
        alpha = float(inputs['alpha'])

        # Statistics
        if utils.all_sample_info_provided(sample_fields):
            x_bar_1 = float(sample_fields[0]['mean'])
            s_1 = float(sample_fields[0]['stdDev'])
            x_bar_2 = float(sample_fields[1]['mean'])
            s_2 = float(sample_fields[1]['stdDev'])
            d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2)
            t_stat = calculate_t_stat_from_means(x_bar_1=x_bar_1,
                                                 s_1=s_1,
                                                 n_1=n_1,
                                                 x_bar_2=x_bar_2,
                                                 s_2=s_2,
                                                 n_2=n_2)
            results['formulae'] = create_t_stat_from_means_formula(
                x_bar_1=x_bar_1,
                s_1=s_1,
                n_1=n_1,
                x_bar_2=x_bar_2,
                s_2=s_2,
                n_2=n_2)
        else:
            d = float(inputs['effectSize'])
            x_bar_1, x_bar_2 = 1, 1 + d
            s_1, s_2 = 1, 1
            t_stat = calculate_t_stat_from_cohens_d(d=d, n_1=n_1, n_2=n_2)
            results['formulae'] = create_t_stat_from_d_formula(d=d,
                                                               n_1=n_1,
                                                               n_2=n_2)

        # Format Stats
        welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2)
        t_critical_os = t.ppf(1 - alpha, df=welches_df)
        t_critical_ts = t.ppf(1 - alpha / 2, df=welches_df)
        results['statistics'] = [
            [t_stat, t_critical_os],
            [t_stat, t_critical_ts],
        ]

        # Notes
        results['notes'] = generate_t_stat_notes(n_1, n_2, d, t_stat)
        results['chartText'] = generate_test_distribution_text(
            alpha=alpha, n_1=n_1, n_2=n_2, df=int(welches_df))

        # Charts
        t_stat = results['statistics'][0][0]
        results['charts'][
            'chartOne'] = generate_t_statistic_vs_effect_size_chart_data(
                n_1=n_1,
                n_2=n_2,
                x_bar_1=x_bar_1,
                x_bar_2=x_bar_2,
                s_1=s_1,
                s_2=s_2,
                alpha=alpha)
        results['charts'][
            'chartTwo'] = generate_t_statistic_vs_sample_size_chart_data(
                n_1=n_1,
                n_2=n_2,
                x_bar_1=x_bar_1,
                x_bar_2=x_bar_2,
                s_1=s_1,
                s_2=s_2,
                alpha=alpha)
        results['charts']['chartThree'] = generate_t_distribution_chart_data(
            alpha=alpha,
            t_stat=t_stat,
            n_1=n_1,
            n_2=n_2,
            x_bar_1=x_bar_1,
            x_bar_2=x_bar_2,
            s_1=s_1,
            s_2=s_2)

        # Labels
        results['labels'] = {
            "columns": ["Test Type", "t-statistic", 't-critical'],
            "rows": ["One-sided test", "Two-sided test"],
        }

    # TARGET: P-VALUE
    elif inputs['target'] == "p-value":
        n_1 = int(sample_fields[0]['n'])
        n_2 = int(sample_fields[1]['n'])
        alpha = float(inputs['alpha'])

        # Statistics
        if utils.all_sample_info_provided(sample_fields):
            x_bar_1 = float(sample_fields[0]['mean'])
            s_1 = float(sample_fields[0]['stdDev'])
            x_bar_2 = float(sample_fields[1]['mean'])
            s_2 = float(sample_fields[1]['stdDev'])
            d = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2)
            welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2)
            t_stat = calculate_t_stat_from_means(x_bar_1=x_bar_1,
                                                 s_1=s_1,
                                                 n_1=n_1,
                                                 x_bar_2=x_bar_2,
                                                 s_2=s_2,
                                                 n_2=n_2)
            results['statistics'] = calculate_p_value(t_stat=t_stat,
                                                      df=welches_df)
            results['formulae'] = create_p_value_from_means_formula(
                x_bar_1=x_bar_1,
                s_1=s_1,
                n_1=n_1,
                x_bar_2=x_bar_2,
                s_2=s_2,
                n_2=n_2)
        else:
            d = float(inputs['effectSize'])
            x_bar_1, x_bar_2 = 1, 1 + d
            s_1, s_2 = 1, 1
            t_stat = calculate_t_stat_from_cohens_d(d=d, n_1=n_1, n_2=n_2)
            results['statistics'] = calculate_p_value(t_stat=t_stat,
                                                      df=(n_1 + n_2 - 2))
            results['formulae'] = create_p_value_from_d_formula(d=d,
                                                                n_1=n_1,
                                                                n_2=n_2)

        # Notes
        results['notes'] = generate_p_value_notes(
            n_1=n_1,
            n_2=n_2,
            d=d,
            p_one_sided=results['statistics'][0][0],
            p_two_sided=results['statistics'][1][0],
            t_stat=t_stat)
        welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2)
        results['chartText'] = generate_test_distribution_text(
            alpha=alpha, n_1=n_1, n_2=n_2, df=int(welches_df))

        # Charts
        results['charts'][
            'chartOne'] = generate_p_value_vs_effect_size_chart_data(
                n_1=n_1,
                n_2=n_2,
                x_bar_1=x_bar_1,
                x_bar_2=x_bar_2,
                s_1=s_1,
                s_2=s_2,
                alpha=alpha)
        results['charts'][
            'chartTwo'] = generate_p_value_vs_sample_size_chart_data(
                n_1=n_1,
                n_2=n_2,
                x_bar_1=x_bar_1,
                x_bar_2=x_bar_2,
                s_1=s_1,
                s_2=s_2,
                alpha=alpha)
        results['charts']['chartThree'] = generate_t_distribution_chart_data(
            alpha=alpha,
            t_stat=t_stat,
            n_1=n_1,
            n_2=n_2,
            x_bar_1=x_bar_1,
            x_bar_2=x_bar_2,
            s_1=s_1,
            s_2=s_2)

        # Labels
        results['labels'] = {
            "columns": ["Test Type", "p-value"],
            "rows": ["One-sided test", "Two-sided test"],
        }

    return results
예제 #5
0
def generate_p_value_vs_effect_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha):
    d_raw = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2)
    d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2)
    d_results = tt.calculate_min_effect_size(n_1=n_1, n_2=n_2, alpha=alpha, power=0.5)
    d_target = d_results[1][0] / d_adjustment
    ff = 0.5
    if d_raw < 0:
        x_min = min(-d_target, d_raw) * (1 + ff)
        x_max = max(-d_target, d_raw) * (1 - ff)
    else:
        x_min = min(d_target, d_raw) * (1 - ff)
        x_max = max(d_target, d_raw) * (1 + ff)

    # Rounding to ensure matches
    step = (x_max - x_min) / 500
    dps = utils.determine_decimal_points(x_max)
    effect_sizes = [round(x, dps) for x in np.arange(x_min, x_max, step)]
    d_actual = utils.find_closest_value(effect_sizes, d_raw)
    d_target = utils.find_closest_value(effect_sizes, d_target)

    os_lower = []
    ts_lower = []
    os_higher = []
    ts_higher = []
    for d in effect_sizes:
        welches_df = utils.welches_degrees_of_freedom(s_1, n_1, s_2, n_2)
        t_stat = tt.calculate_t_stat_from_cohens_d(d, n_1, n_2) * d_adjustment
        results = tt.calculate_p_value(t_stat, welches_df)
        if d <= d_target:
            os_lower.append(results[0][0])
            ts_lower.append(results[1][0])
        else:
            os_lower.append(None)
            ts_lower.append(None)
        if d >= d_target:
            os_higher.append(results[0][0])
            ts_higher.append(results[1][0])
        else:
            os_higher.append(None)
            ts_higher.append(None)

    # Determine X axis range
    format_string = "{:." + str(dps) + "f}"
    x_axis_values = [format_string.format(x) for x in list(effect_sizes)]

    return {
        "title": "p-value vs Effect Size (sample size: {}, enrolment ratio: {:.3f})".format(n_1 + n_2, n_1/n_2),
        "xAxisLabel": "Effect Size (d)",
        "yAxisLabel": "p-value",
        "labels": x_axis_values,
        "verticalLine": {
            "position": format_string.format(d_actual),
            "label": "Effect Size: " + format_string.format(d_raw)
        },
        "dataset": [
            {
                "label": "One Sided Test",
                "data": os_lower,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            },
            {
                "label": "One Sided Test",
                "data": os_higher,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[1],
                "backgroundColor": colors.background_colors[1]
            },
            {
                "label": "Two Sided Test",
                "data": ts_lower,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            },
            {
                "label": "Two Sided Test",
                "data": ts_higher,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[1],
                "backgroundColor": colors.background_colors[1]
            }
        ]
    }
예제 #6
0
def generate_p_value_vs_sample_size_chart_data(n_1, n_2, x_bar_1, x_bar_2, s_1, s_2, alpha):
    d_actual = utils.calculate_cohens_d(x_bar_1, s_1, n_1, x_bar_2, s_2, n_2)
    d_adjustment = utils.calculate_d_adjustment(s_1, n_1, s_2, n_2)
    n_raw = n_1 + n_2
    r_e = n_1 / n_2
    n_results = tt.calculate_sample_size_from_means(mu_1=x_bar_1, mu_2=x_bar_2, sigma_1=s_1, sigma_2=s_2, alpha=alpha, power=0.5, enrolment_ratio=r_e)
    n_target = n_results[0][1] + n_results[1][1]
    ff = 0.1
    x_min = int(max(4, min(n_raw * (1 - ff), n_target * (1 - ff))))
    x_max = int(max(n_raw * (1 + ff), n_target * (1 + ff)))
    step = int(max(1, (x_max - x_min) / 500))
    sample_sizes = np.arange(x_min, x_max, step)
    n_actual = utils.find_closest_value(sample_sizes, n_raw)
    n_target = utils.find_closest_value(sample_sizes, n_target)

    os_lower = []
    ts_lower = []
    os_higher = []
    ts_higher = []
    for n in sample_sizes:
        cn_1 = math.ceil(n * r_e / (1 + r_e))
        cn_2 = math.ceil(n - cn_1)
        welches_df = utils.welches_degrees_of_freedom(s_1, cn_1, s_2, cn_2)
        d = utils.calculate_cohens_d(x_bar_1, s_1, cn_1, x_bar_2, s_2, cn_2)
        t_stat = tt.calculate_t_stat_from_cohens_d(d, cn_1, cn_2) * d_adjustment
        results = tt.calculate_p_value(t_stat, welches_df)
        if n <= n_target:
            os_lower.append(results[0][0])
            ts_lower.append(results[1][0])
        else:
            os_lower.append(None)
            ts_lower.append(None)
        if n >= n_target:
            os_higher.append(results[0][0])
            ts_higher.append(results[1][0])
        else:
            os_higher.append(None)
            ts_higher.append(None)

    # Determine X axis range
    x_axis_values = [str(x) for x in list(sample_sizes)]

    return {
        "title": "Sample Size vs p-value (effect size: {:.3f}, enrolment ratio: {:.3f})".format(d_actual, n_1/n_2),
        "xAxisLabel": "Total Samples",
        "yAxisLabel": "p-value",
        "labels": x_axis_values,
        "verticalLine": {
            "position": str(n_actual),
            "label": "Sample Size: {}".format(n_raw)
        },
        "dataset": [
            {
                "label": "One Sided Test",
                "data": os_lower,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            },
            {
                "label": "One Sided Test",
                "data": os_higher,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[1],
                "backgroundColor": colors.background_colors[1]
            },
            {
                "label": "Two Sided Test",
                "data": ts_lower,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            },
            {
                "label": "Two Sided Test",
                "data": ts_higher,
                "pointBorderWidth": 0,
                "pointRadius": 0.5,
                "borderColor": colors.line_colors[1],
                "backgroundColor": colors.background_colors[1]
            }
        ]
    }
예제 #7
0
def generate_sampling_distributions_chart_data(mu_1, mu_2, sigma_1, sigma_2, n_1, n_2, alpha):
    n = n_1 + n_2 - 2
    df = utils.welches_degrees_of_freedom(sigma_1, n_1, sigma_2, n_2)
    H0_mean = 0
    HA_mean = mu_2 - mu_1
    sd_pooled = utils.calculate_pooled_standard_deviation(n_1, n_2, sigma_1, sigma_2)
    se = sd_pooled * (1/n_1 + 1/n_2)**0.5
    d = utils.calculate_cohens_d(mu_1=mu_1, sigma_1=sigma_1, n_1=n_1, mu_2=mu_2, sigma_2=sigma_2, n_2=n_2)
    nc = d * (2 / (1/n_1 + 1/n_2) / 2)**0.5
    if mu_1 > mu_2:
        nc *= -1

    # Determine X axis range
    x_min = min(H0_mean, nc) - 4
    x_max = max(H0_mean, nc) + 4
    x_axis_values = list(np.linspace(start=x_min, stop=x_max, num=1000, endpoint=True))

    alpha_lower = t.ppf(q=alpha/2, df=df)
    alpha_upper = -1 * alpha_lower

    # Insert key values
    for val in [H0_mean, nc, alpha_lower, alpha_upper]:
        if val not in x_axis_values:
            bisect.insort(x_axis_values, val)

    H0_significant = []
    H0_not_significant = []
    HA_powered = []
    HA_unpowered = []
    threshold = alpha_upper if HA_mean >= H0_mean else alpha_lower

    nct_dist = utils.initialize_nct_distribution(df=df, nc=nc)
    for value in x_axis_values:
        # Null Hypothesis
        H0_not_significant.append(t.pdf(x=value, df=df))
        if value < alpha_lower or value > alpha_upper:
            H0_significant.append(t.pdf(x=value, df=df))
        else:
            H0_significant.append(None)

        # Alternative Hypothesis
        HA_powered.append(nct_dist.pdf(x=value))
        if HA_mean < H0_mean and value > alpha_lower:
            HA_unpowered.append(nct_dist.pdf(x=value))
        elif HA_mean >= H0_mean and value < alpha_upper:
            HA_unpowered.append(nct_dist.pdf(x=value))
        else:
            HA_unpowered.append(None)

    if HA_mean < H0_mean:
        power = nct_dist.cdf(x=alpha_lower)
        threshold = alpha_lower
    else:
        power = 1 - nct_dist.cdf(x=alpha_upper)
        threshold = alpha_upper

    decimal_points = utils.determine_decimal_points(x_max)
    format_string = "{:." + str(decimal_points) + "f}"

    return {
        "title": "Central and Noncentral Distributions (effect size: {:0.3f}, α: {:0.3f}, power (1 - β): {:.1%})".format(d, alpha, power),
        "xAxisLabel": "t statistic",
        "yAxisLabel": "Density",
        "labels": [format_string.format(x) for x in x_axis_values],
        "verticalLine": {
            "position": format_string.format(utils.find_closest_value(x_axis_values, threshold)),
            "label": "t crit: " + format_string.format(threshold)
        },
        "hidePoints": True,
        "dataset": [
            {
                "label": "H0 - Significant",
                "data": H0_significant,
                "borderColor": colors.line_colors[0],
                "backgroundColor": colors.background_colors[0]
            },
            {
                "label": "H0",
                "data": H0_not_significant,
                "borderColor": colors.line_colors[0],
                "backgroundColor": None
            },
            {
                "label": "HA - Powered",
                "data": HA_powered,
                "borderColor": colors.line_colors[1],
                "backgroundColor": None
            },
            {
                "label": "HA",
                "data": HA_unpowered,
                "borderColor": colors.line_colors[1],
                "backgroundColor": colors.background_colors[1]
            }
        ]
    }