def get_prop_system_usage(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "prop": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_prop.columns] = df_prop n = df_prop.shape[0] usage_ratio = metrics.get_mean(metrics.get_dss_usage(df_prop)) unique_calls = metrics.get_mean( metrics.get_dss_average_unique_calls(df_prop)) pnull = 0 phat = usage_ratio z_ratio = sms.proportions_ztest(usage_ratio * n, n, pnull, alternative='larger') # z_calls = sms.proportions_ztest() res = { "usage_ratio": f"{100*usage_ratio:.2f} %", "mean unique-calls": unique_calls, "z-score-usage_ratio": f"{z_ratio[0]:.3f} ({z_ratio[1]:.3f})", #"z-score- mean unique-calls": 0 } return { k: (f"{v:.3f}" if isinstance(v, numbers.Number) and v != int(v) else v) for k, v in res.items() }
def get_rel_responder_gain2(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp resp_gain = metrics.get_mean( metrics.gain_responder(df_prop["min_offer"], df_prop["offer"])) resp_gain_dss = metrics.get_mean( metrics.gain_responder(df_prop["min_offer_dss"], df_prop["offer_final"])) resp_gain_2 = metrics.get_mean( metrics.get_rel_gain_responder(df_prop["min_offer"], df_prop["min_offer_dss"], df_prop["offer"])) resp_gain_dss_2 = metrics.get_mean( metrics.get_rel_gain_responder(df_prop["min_offer"], df_prop["min_offer_dss"], df_prop["offer_final"])) return { "resp_gain: ": resp_gain, "resp_gain_dss: ": resp_gain_dss, "rel_gain: vs proposer": resp_gain_2, "rel_gain: vs proposer + DSS": resp_gain_dss_2, # "rejection_ratio": rejection_ratio(df_prop) }
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp("t12a") prop_values = metric(df_resp["min_offer"], df_prop["offer"]) prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) prop_values_ref = metric(df_resp_ref["min_offer"], df_prop_ref["offer_dss"]) prop_value_ref = metrics.get_mean(prop_values_ref) # auto_dss_values = metric(df_resp["min_offer"], df_prop["ai_offer"]) # auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None if is_categorical: table, res = rp.crosstab(pd.Series(prop_values_ref), pd.Series(prop_dss_values), test='chi-square') s, p, r = res.results.values test_label = f"(pearson chi2)" test_label = f"chi2" print("Conclusion: ", generate_cat_stat_sentence(np.mean(resp_dss_values), np.std(resp_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment+".dss", label2="t20.dss")) else: table, res = rp.ttest(pd.Series(prop_values_ref), pd.Series(prop_dss_values), paired=False) s = res.results[2] if alternative=="greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] print("Conclusion: ", generate_stat_sentence(np.mean(resp_dss_values), np.std(resp_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment+".dss", label2="t20.dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("RESUME: ", res) if as_percentage: res = { "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', "T10": f'{100 * prop_value_ref:.2f} %', } else: res = { "Proposer + DSS": f'{prop_dss_value:.2f}', "T10": f'{prop_value_ref:.2f}', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def get_rel_responder_min_offer(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp _, df_resp_ref = get_prop_resp("t12a") resp_values = df_resp["min_offer_final"] resp_ref_values = df_resp["min_offer"] table, res = rp.ttest(pd.Series(resp_values), pd.Series(resp_ref_values), paired=True) diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print(res) print( "Conclusion: ", generate_stat_sentence(np.mean(resp_ref_values), np.std(resp_ref_values), np.mean(resp_values), np.std(resp_values), s, p, dof, diff=diff, label1=treatment, label2=treatment + ".dss")) resp_stat = stats.ttest_rel(df_resp["min_offer"], df_resp["min_offer_final"]) resp_stat_t00 = stats.ttest_ind(df_resp["min_offer_final"], df_resp_ref["min_offer"]) resp_wc_stat = stats.wilcoxon(df_resp["min_offer"], df_resp["min_offer_final"]) res = { "mean T12": metrics.get_mean(df_resp["min_offer"]), "mean T13": metrics.get_mean(df_resp["min_offer_final"]), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = { k: (f"{v:.3f}" if pd.notnull(v) and v != int(v) else v) for k, v in res.items() } res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, metric_is_ratio=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) prop_values = metric(df_resp["min_offer"], df_prop["offer"]) prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) auto_dss_values = metric(df_resp["min_offer"], df_prop["ai_offer"]) auto_dss_value = metrics.get_mean(auto_dss_values) if metric_is_ratio: n = prop_dss_values.shape[0] #z_ratio = sms.proportions_ztest(usage_ratio * n, n, pnull, alternative='larger') s, p = 0, 0 else: assert prop_dss_values.shape[0] == prop_dss_values.shape[0] s, p = stats.wilcoxon(prop_values, prop_dss_values, alternative=alternative or 'greater') #return {"stat": f"{round(s, 3)}", "p": f"{round(p, 3)}"} if as_percentage: res = { "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', "Auto DSS": f'{100 * auto_dss_value:.2f} %', "prop:dss - prop": f'{100 * (prop_dss_value - prop_value):.2f} %', } else: res = { "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{prop_dss_value:.2f}', "Auto DSS": f'{auto_dss_value:.2f}', "prop:dss - prop": f'{(prop_dss_value - prop_value):.2f} %', } #res["stat"] = f"{s, 3)}", "p": f"{round(p, 3)}" res["$H_0$: equal, H_a: greater"] = f"{s:.3f} ({p:.3f})" print("RES: ", res) return res
def get_prop_summary(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "prop": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_prop.columns] = df_prop df_prop_t00, _ = get_prop_resp("t00") resp_stat = stats.ttest_rel(df_prop["offer"], df_prop["offer_final"]) print( "REF to t00", stats.ttest_ind( get_prop_resp("t00")[0]["offer"], df_prop["offer_final"])) resp_stat_t00 = stats.ttest_ind(df_prop["offer_final"], df_prop_t00["offer"]) resp_wc_stat = stats.wilcoxon(df_prop["offer"], df_prop["offer_final"]) #cm = sms.CompareMeans(sms.DescrStatsW(df_prop["offer"]), sms.DescrStatsW(df_prop["offer_final"])) cm = sms.CompareMeans.from_data(df_prop["offer"], df_prop["offer_final"]) print(treatment, "CHECK: ", cm.tconfint_diff(usevar="unequal")) res = { "n": df_prop["offer"].shape[0], "mean (initial)": metrics.get_mean(df_prop["offer"]), "mean": metrics.get_mean(df_prop["offer_final"]), "median": df_prop["offer_final"].median(), "mode": df_prop["offer_final"].mode()[0], "standard deviation": metrics.get_std(df_prop["offer"]), "standard deviation": metrics.get_std(df_prop["offer_final"]), # "rejection_ratio": rejection_ratio(df_prop) "stat": resp_stat[0], "p-value": resp_stat[1], "stat-t00": resp_stat_t00[0], "p-value-t00": resp_stat_t00[1], "stat-wc": resp_wc_stat[0], "p-value-wc": resp_wc_stat[1], } return { k: (f"{v:.3f}" if pd.notnull(v) and v != int(v) else v) for k, v in res.items() }
def get_rel_responder_min_offer(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp _, df_resp_t00 = get_prop_resp("t00") resp_stat = stats.ttest_rel(df_resp["min_offer"], df_resp["min_offer_final"]) print( "REF to t00", stats.ttest_ind( get_prop_resp("t00")[0]["min_offer"], df_resp["min_offer_final"])) resp_stat_t00 = stats.ttest_ind(df_resp["min_offer_final"], df_resp_t00["min_offer"]) resp_wc_stat = stats.wilcoxon(df_resp["min_offer"], df_resp["min_offer_final"]) res = { "n": df_resp["min_offer"].shape[0], "mean (initial)": metrics.get_mean(df_resp["min_offer"]), "mean": metrics.get_mean(df_resp["min_offer_final"]), "median": df_resp["min_offer_final"].median(), "mode": df_resp["min_offer_final"].mode()[0], "standard deviation": metrics.get_std(df_resp["min_offer"]), "standard deviation": metrics.get_std(df_resp["min_offer_final"]), # "rejection_ratio": rejection_ratio(df_prop) "stat": resp_stat[0], "p-value": resp_stat[1], "stat-t00": resp_stat_t00[0], "p-value-t00": resp_stat_t00[1], "stat-wc": resp_wc_stat[0], "p-value-wc": resp_wc_stat[1] } return { k: (f"{v:.3f}" if pd.notnull(v) and v != int(v) else v) for k, v in res.items() }
def get_rel_responder_abs_df(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp df_prop_full, df_resp_ref = get_prop_resp("t12a") resp_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop)) resp_ref_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop_full)) table, res = rp.ttest(pd.Series(resp_values), pd.Series(resp_ref_values), paired=False) s = res.results[2] p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print("Conclusion: ", generate_stat_sentence(np.mean(resp_ref_values), np.std(resp_ref_values), np.mean(resp_values), np.std(resp_values), s, p, dof, diff=diff, label1="t12.dss", label2=treatment+".dss")) print("Table:", table) print("Res:", res) res = { "rel. min_offer T12": metrics.get_mean(resp_ref_values), "rel. min_offer T13": metrics.get_mean(resp_values), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = {k: (f"{v:.3f}" if pd.notnull(v) and v!= int(v) else v) for k,v in res.items()} res["min_offer" + test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" print() return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_t20, df_resp_t20 = get_prop_resp("t20a") # prop_values = metric(df_resp["min_offer_dss"], df_prop["offer"]) # prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer_dss"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) auto_dss_values = metric(df_resp_t20["min_offer_dss"], df_prop_t20["ai_offer"]) auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None if is_categorical: # table = np.array([np.bincount(prop_values), np.bincount(prop_dss_values)]) # print("TABLE: ", table) # checked using: http://vassarstats.net/propcorr.html # s, p = sms2.mcnemar(prop_values, prop_dss_values, exact=False, correction=False) table, res = rp.crosstab(prop_dss_values, auto_dss_values, test='mcnemar') s, p, r = res.results.values test_label = f"(mcnemar) H0: equal, Ha: {'two-sided'}" print( "Conclusion: ", generate_cat_stat_sentence(np.mean(prop_dss_values), np.std(prop_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment + ".dss", label2="t20.dss")) else: #s, p = stats.wilcoxon(prop_values, auto_dss_values, alternative=alternative or 'two-sided') table, res = rp.ttest(pd.Series(prop_dss_values), pd.Series(auto_dss_values), paired=False) test_label = f"(wilcoxon) H0: equal, Ha: {alternative or 'two-sided'}" diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(prop_dss_values), np.std(prop_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment + ".dss", label2="t20.dss")) if as_percentage: res = { # "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', "T20 Auto DSS": f'{100 * auto_dss_value:.2f} %', "prop:dss - auto prop": f'{100 * (prop_dss_value - auto_dss_value):.2f} %', } else: res = { # "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{prop_dss_value:.2f}', "T20 Auto DSS": f'{auto_dss_value:.2f}', "prop:dss - auto prop": f'{(prop_dss_value - auto_dss_value):.2f} %', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) prop_values = metric(df_resp["min_offer"], df_prop["offer"]) prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) auto_dss_values = metric(df_resp["min_offer"], df_prop["ai_offer"]) auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None print(metric) if is_categorical: table = pd.crosstab(prop_values, prop_dss_values) # print("TABLE: ", table) # checked using: http://vassarstats.net/propcorr.html # s, p = sms2.mcnemar(prop_values, prop_dss_values, exact=False, correction=False) table, res = rp.crosstab(prop_values, prop_dss_values, test='mcnemar') #chi, p, s = (res.results.values) s, p, r = (res.results.values) print("Conclusion: ", generate_stat_sentence(np.mean(prop_values), np.std(prop_values), np.mean(prop_dss_values), np.std(prop_dss_values), s, p, dof, diff=diff, label1=treatment, label2=treatment+".dss")) test_label = f"(mcnemar - chi2)" else: s, p = stats.wilcoxon(prop_values, prop_dss_values, alternative=alternative or 'two-sided') table, res = rp.ttest(pd.Series(prop_values), pd.Series(prop_dss_values), paired=True) #res = rp.ttest(pd.Series(prop_values), pd.Series(prop_dss_values), paired=True) diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] test_label = f"(ttest dependent)" print("Conclusion: ", generate_stat_sentence(np.mean(prop_values), np.std(prop_values), np.mean(prop_dss_values), np.std(prop_dss_values), s, p, dof, diff=diff, label1=treatment, label2=treatment+".dss")) print("TABLE:", table) print("RES:", res) if as_percentage: res = { "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', # "prop:dss - prop": f'{100 * (prop_dss_value - prop_value):.2f} %', } else: res = { "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{prop_dss_value:.2f}', # "prop:dss - prop": f'{(prop_dss_value - prop_value):.2f} %', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp("t11a") print(metric.__name__) metric_values = metric(df_prop) metric_value = metrics.get_mean(metric_values) metric_ref_values = metric(df_prop_ref) metric_value_ref = metrics.get_mean(metric_ref_values) metric_values = metrics.get_data(metric_values) metric_ref_values = metrics.get_data(metric_ref_values) dof = 0 diff = None if is_categorical: table, res = rp.crosstab(pd.Series(metric_ref_values), pd.Series(metric_values), test='chi-square') s, p, r = res.results.values print( "Conclusion: ", generate_cat_stat_sentence(np.mean(metric_ref_values), np.std(metric_ref_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t11a.dss", label2=treatment + ".dss")) test_label = f"(pearson chi2)" else: #print("Ranksums", stats.ranksums(metric_ref_values, metric_values)) table, res = rp.ttest(pd.Series(metric_ref_values), pd.Series(metric_values), paired=False) s = res.results[2] if alternative == "greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(metric_ref_values), np.std(metric_ref_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t11a.dss", label2=treatment + ".dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("RESUME: ", res) print("TABLE: ", table) if as_percentage: res = { # "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * metric_value:.2f} %', "T11A ": f'{100 * metric_value_ref:.2f} %', "prop:dss - auto prop": f'{100 * (metric_value - metric_value_ref):.2f} %', } else: res = { # "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{metric_value:.2f}', "T11A": f'{metric_value_ref:.2f}', "prop:dss - auto prop": f'{(metric_value - metric_value_ref):.2f} %', } res[test_label] = f"{s:.3f} ({p:.3f})" if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def get_info_accuracy(treatment, con, dfs=None, use_percentage=None, use_labels=None): if treatment in ("t13a", "t13"): ref = "t12a" elif treatment in ("t11a", "t11b"): ref = "t10b" else: ref = treatment df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp(ref) if SELECTION == "prop": values = df_prop["feedback_accuracy"] values_ref = df_prop_ref["feedback_accuracy"] else: values = df_resp["feedback_fairness"] values_ref = df_resp_ref["feedback_fairness"] # feedback_fairness values_ref = values_ref.apply(lambda x: AI_FEEDBACK_ACCURACY_SCALAS_REV.get(x, x)) values = values.apply(lambda x: AI_FEEDBACK_ACCURACY_SCALAS_REV.get(x, x)) # print("DIFF: ", values, values_ref) # resp_values = metrics.get_data(metrics.get_rel_min_offer_df(df_resp)) # resp_ref_values = metrics.get_data(metrics.get_rel_min_offer_df(df_resp_ref)) values print("MEDIAN: ", values.median(), values_ref.median()) dof = 0 diff = 0 table, res = rp.crosstab(pd.Series(values), pd.Series(values_ref), test='g-test') s, p, r = res.results.values # s = res.results[2] # p = res.results[3] # r = res.results[9] # diff = res.results[0] # dof = res.results[1] # s = res.results[2] # p = res.results[3] # r = res.results[9] tmp_res = None tmp_res = stats.mannwhitneyu(values, values_ref, use_continuity=False) # tmp_res = stats.ranksums(values, values_ref) print("TMP values: ", tmp_res) print("Conclusion: ", generate_stat_sentence(np.mean(values_ref), np.std(values_ref), np.mean(values), np.std(values), s, p, dof, diff=diff, label1="t12.dss", label2=treatment+".dss")) print("Table:", table) print("Res:", res) res = { "rel. min_offer T12": metrics.get_mean(values_ref), "rel. min_offer T13": metrics.get_mean(values), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = {k: (f"{v:.3f}" if pd.notnull(v) and v!= int(v) else v) for k,v in res.items()} res["min_offer" + test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_t10, df_resp_t10 = get_prop_resp("t10a") metric_values = metric(df_prop) metric_value = metrics.get_mean(metric_values) metric_t10_values = metric(df_prop_t10) metric_value_t10 = metrics.get_mean(metric_t10_values) metric_values = metrics.get_data(metric_values) metric_t10_values = metrics.get_data(metric_t10_values) #print(stats.chisquare(metric_values[:103], metric_t10_values[:103])) dof = 0 diff = None print(metric.__name__) if is_categorical: #table, res = rp.crosstab(pd.Series(metric_values), pd.Series(metric_t10_values), test='g-test') table, res = rp.crosstab(pd.Series(metric_values), pd.Series(metric_t10_values), test='fisher') #print(table, res) #s, p, r = res.results s = res.results[0] p = res.results[1] r = res.results[4] test_label = f"(g-test chi2)" print( "Conclusion: ", generate_cat_stat_sentence(np.mean(metric_t10_values), np.std(metric_t10_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t10a.dss", label2=treatment + ".dss")) print( pd.crosstab(pd.Series(metric_t10_values), pd.Series(metric_values))) else: table, res = rp.ttest(pd.Series(metric_t10_values), pd.Series(metric_values), paired=False) s = res.results[2] if alternative == "greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(metric_t10_values), np.std(metric_t10_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t10a.dss", label2=treatment + ".dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("TABLE: ", table) print("TEST: ", res) if as_percentage: res = { "Proposer + DSS": f'{100 * metric_value:.2f} %', "T10": f'{100 * metric_value_t10:.2f} %', } else: res = { "Proposer + DSS": f'{metric_value:.2f}', "T10": f'{metric_value_t10:.2f}', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res