def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp("t12a") prop_values = metric(df_resp["min_offer"], df_prop["offer"]) prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) prop_values_ref = metric(df_resp_ref["min_offer"], df_prop_ref["offer_dss"]) prop_value_ref = metrics.get_mean(prop_values_ref) # auto_dss_values = metric(df_resp["min_offer"], df_prop["ai_offer"]) # auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None if is_categorical: table, res = rp.crosstab(pd.Series(prop_values_ref), pd.Series(prop_dss_values), test='chi-square') s, p, r = res.results.values test_label = f"(pearson chi2)" test_label = f"chi2" print("Conclusion: ", generate_cat_stat_sentence(np.mean(resp_dss_values), np.std(resp_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment+".dss", label2="t20.dss")) else: table, res = rp.ttest(pd.Series(prop_values_ref), pd.Series(prop_dss_values), paired=False) s = res.results[2] if alternative=="greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] print("Conclusion: ", generate_stat_sentence(np.mean(resp_dss_values), np.std(resp_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment+".dss", label2="t20.dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("RESUME: ", res) if as_percentage: res = { "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', "T10": f'{100 * prop_value_ref:.2f} %', } else: res = { "Proposer + DSS": f'{prop_dss_value:.2f}', "T10": f'{prop_value_ref:.2f}', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def get_rel_responder_min_offer(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp _, df_resp_ref = get_prop_resp("t12a") resp_values = df_resp["min_offer_final"] resp_ref_values = df_resp["min_offer"] table, res = rp.ttest(pd.Series(resp_values), pd.Series(resp_ref_values), paired=True) diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print(res) print( "Conclusion: ", generate_stat_sentence(np.mean(resp_ref_values), np.std(resp_ref_values), np.mean(resp_values), np.std(resp_values), s, p, dof, diff=diff, label1=treatment, label2=treatment + ".dss")) resp_stat = stats.ttest_rel(df_resp["min_offer"], df_resp["min_offer_final"]) resp_stat_t00 = stats.ttest_ind(df_resp["min_offer_final"], df_resp_ref["min_offer"]) resp_wc_stat = stats.wilcoxon(df_resp["min_offer"], df_resp["min_offer_final"]) res = { "mean T12": metrics.get_mean(df_resp["min_offer"]), "mean T13": metrics.get_mean(df_resp["min_offer_final"]), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = { k: (f"{v:.3f}" if pd.notnull(v) and v != int(v) else v) for k, v in res.items() } res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def get_rel_responder_abs_df(treatment, con, dfs=None, use_percentage=None, use_labels=None): if SELECTION != "resp": return df_prop, df_resp = get_prop_resp(treatment) df_prop[df_resp.columns] = df_resp df_prop_full, df_resp_ref = get_prop_resp("t12a") resp_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop)) resp_ref_values = metrics.get_data(metrics.get_rel_responder_abs_df(df_prop_full)) table, res = rp.ttest(pd.Series(resp_values), pd.Series(resp_ref_values), paired=False) s = res.results[2] p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print("Conclusion: ", generate_stat_sentence(np.mean(resp_ref_values), np.std(resp_ref_values), np.mean(resp_values), np.std(resp_values), s, p, dof, diff=diff, label1="t12.dss", label2=treatment+".dss")) print("Table:", table) print("Res:", res) res = { "rel. min_offer T12": metrics.get_mean(resp_ref_values), "rel. min_offer T13": metrics.get_mean(resp_values), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = {k: (f"{v:.3f}" if pd.notnull(v) and v!= int(v) else v) for k,v in res.items()} res["min_offer" + test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" print() return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_t20, df_resp_t20 = get_prop_resp("t20a") # prop_values = metric(df_resp["min_offer_dss"], df_prop["offer"]) # prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer_dss"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) auto_dss_values = metric(df_resp_t20["min_offer_dss"], df_prop_t20["ai_offer"]) auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None if is_categorical: # table = np.array([np.bincount(prop_values), np.bincount(prop_dss_values)]) # print("TABLE: ", table) # checked using: http://vassarstats.net/propcorr.html # s, p = sms2.mcnemar(prop_values, prop_dss_values, exact=False, correction=False) table, res = rp.crosstab(prop_dss_values, auto_dss_values, test='mcnemar') s, p, r = res.results.values test_label = f"(mcnemar) H0: equal, Ha: {'two-sided'}" print( "Conclusion: ", generate_cat_stat_sentence(np.mean(prop_dss_values), np.std(prop_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment + ".dss", label2="t20.dss")) else: #s, p = stats.wilcoxon(prop_values, auto_dss_values, alternative=alternative or 'two-sided') table, res = rp.ttest(pd.Series(prop_dss_values), pd.Series(auto_dss_values), paired=False) test_label = f"(wilcoxon) H0: equal, Ha: {alternative or 'two-sided'}" diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(prop_dss_values), np.std(prop_dss_values), np.mean(auto_dss_values), np.std(auto_dss_values), s, p, dof, diff=diff, label1=treatment + ".dss", label2="t20.dss")) if as_percentage: res = { # "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', "T20 Auto DSS": f'{100 * auto_dss_value:.2f} %', "prop:dss - auto prop": f'{100 * (prop_dss_value - auto_dss_value):.2f} %', } else: res = { # "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{prop_dss_value:.2f}', "T20 Auto DSS": f'{auto_dss_value:.2f}', "prop:dss - auto prop": f'{(prop_dss_value - auto_dss_value):.2f} %', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) prop_values = metric(df_resp["min_offer"], df_prop["offer"]) prop_value = metrics.get_mean(prop_values) prop_dss_values = metric(df_resp["min_offer"], df_prop["offer_dss"]) prop_dss_value = metrics.get_mean(prop_dss_values) auto_dss_values = metric(df_resp["min_offer"], df_prop["ai_offer"]) auto_dss_value = metrics.get_mean(auto_dss_values) dof = 0 diff = None print(metric) if is_categorical: table = pd.crosstab(prop_values, prop_dss_values) # print("TABLE: ", table) # checked using: http://vassarstats.net/propcorr.html # s, p = sms2.mcnemar(prop_values, prop_dss_values, exact=False, correction=False) table, res = rp.crosstab(prop_values, prop_dss_values, test='mcnemar') #chi, p, s = (res.results.values) s, p, r = (res.results.values) print("Conclusion: ", generate_stat_sentence(np.mean(prop_values), np.std(prop_values), np.mean(prop_dss_values), np.std(prop_dss_values), s, p, dof, diff=diff, label1=treatment, label2=treatment+".dss")) test_label = f"(mcnemar - chi2)" else: s, p = stats.wilcoxon(prop_values, prop_dss_values, alternative=alternative or 'two-sided') table, res = rp.ttest(pd.Series(prop_values), pd.Series(prop_dss_values), paired=True) #res = rp.ttest(pd.Series(prop_values), pd.Series(prop_dss_values), paired=True) diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] test_label = f"(ttest dependent)" print("Conclusion: ", generate_stat_sentence(np.mean(prop_values), np.std(prop_values), np.mean(prop_dss_values), np.std(prop_dss_values), s, p, dof, diff=diff, label1=treatment, label2=treatment+".dss")) print("TABLE:", table) print("RES:", res) if as_percentage: res = { "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * prop_dss_value:.2f} %', # "prop:dss - prop": f'{100 * (prop_dss_value - prop_value):.2f} %', } else: res = { "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{prop_dss_value:.2f}', # "prop:dss - prop": f'{(prop_dss_value - prop_value):.2f} %', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp("t11a") print(metric.__name__) metric_values = metric(df_prop) metric_value = metrics.get_mean(metric_values) metric_ref_values = metric(df_prop_ref) metric_value_ref = metrics.get_mean(metric_ref_values) metric_values = metrics.get_data(metric_values) metric_ref_values = metrics.get_data(metric_ref_values) dof = 0 diff = None if is_categorical: table, res = rp.crosstab(pd.Series(metric_ref_values), pd.Series(metric_values), test='chi-square') s, p, r = res.results.values print( "Conclusion: ", generate_cat_stat_sentence(np.mean(metric_ref_values), np.std(metric_ref_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t11a.dss", label2=treatment + ".dss")) test_label = f"(pearson chi2)" else: #print("Ranksums", stats.ranksums(metric_ref_values, metric_values)) table, res = rp.ttest(pd.Series(metric_ref_values), pd.Series(metric_values), paired=False) s = res.results[2] if alternative == "greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(metric_ref_values), np.std(metric_ref_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t11a.dss", label2=treatment + ".dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("RESUME: ", res) print("TABLE: ", table) if as_percentage: res = { # "Proposer": f'{100 * prop_value:.2f} %', "Proposer + DSS": f'{100 * metric_value:.2f} %', "T11A ": f'{100 * metric_value_ref:.2f} %', "prop:dss - auto prop": f'{100 * (metric_value - metric_value_ref):.2f} %', } else: res = { # "Proposer": f'{prop_value:.2f}', "Proposer + DSS": f'{metric_value:.2f}', "T11A": f'{metric_value_ref:.2f}', "prop:dss - auto prop": f'{(metric_value - metric_value_ref):.2f} %', } res[test_label] = f"{s:.3f} ({p:.3f})" if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def get_info_accuracy(treatment, con, dfs=None, use_percentage=None, use_labels=None): if treatment in ("t13a", "t13"): ref = "t12a" elif treatment in ("t11a", "t11b"): ref = "t10b" else: ref = treatment df_prop, df_resp = get_prop_resp(treatment) df_prop_ref, df_resp_ref = get_prop_resp(ref) if SELECTION == "prop": values = df_prop["feedback_accuracy"] values_ref = df_prop_ref["feedback_accuracy"] else: values = df_resp["feedback_fairness"] values_ref = df_resp_ref["feedback_fairness"] # feedback_fairness values_ref = values_ref.apply(lambda x: AI_FEEDBACK_ACCURACY_SCALAS_REV.get(x, x)) values = values.apply(lambda x: AI_FEEDBACK_ACCURACY_SCALAS_REV.get(x, x)) # print("DIFF: ", values, values_ref) # resp_values = metrics.get_data(metrics.get_rel_min_offer_df(df_resp)) # resp_ref_values = metrics.get_data(metrics.get_rel_min_offer_df(df_resp_ref)) values print("MEDIAN: ", values.median(), values_ref.median()) dof = 0 diff = 0 table, res = rp.crosstab(pd.Series(values), pd.Series(values_ref), test='g-test') s, p, r = res.results.values # s = res.results[2] # p = res.results[3] # r = res.results[9] # diff = res.results[0] # dof = res.results[1] # s = res.results[2] # p = res.results[3] # r = res.results[9] tmp_res = None tmp_res = stats.mannwhitneyu(values, values_ref, use_continuity=False) # tmp_res = stats.ranksums(values, values_ref) print("TMP values: ", tmp_res) print("Conclusion: ", generate_stat_sentence(np.mean(values_ref), np.std(values_ref), np.mean(values), np.std(values), s, p, dof, diff=diff, label1="t12.dss", label2=treatment+".dss")) print("Table:", table) print("Res:", res) res = { "rel. min_offer T12": metrics.get_mean(values_ref), "rel. min_offer T13": metrics.get_mean(values), # "rejection_ratio": rejection_ratio(df_prop) } test_label = f"(ttest independent) H0: equal" res = {k: (f"{v:.3f}" if pd.notnull(v) and v!= int(v) else v) for k,v in res.items()} res["min_offer" + test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res
def _get_prop_vs_prop_dss_score(treatment, con, dfs=None, use_percentage=None, use_labels=None, metric=None, as_percentage=None, is_categorical=None, alternative=None): df_prop, df_resp = get_prop_resp(treatment) df_prop_t10, df_resp_t10 = get_prop_resp("t10a") metric_values = metric(df_prop) metric_value = metrics.get_mean(metric_values) metric_t10_values = metric(df_prop_t10) metric_value_t10 = metrics.get_mean(metric_t10_values) metric_values = metrics.get_data(metric_values) metric_t10_values = metrics.get_data(metric_t10_values) #print(stats.chisquare(metric_values[:103], metric_t10_values[:103])) dof = 0 diff = None print(metric.__name__) if is_categorical: #table, res = rp.crosstab(pd.Series(metric_values), pd.Series(metric_t10_values), test='g-test') table, res = rp.crosstab(pd.Series(metric_values), pd.Series(metric_t10_values), test='fisher') #print(table, res) #s, p, r = res.results s = res.results[0] p = res.results[1] r = res.results[4] test_label = f"(g-test chi2)" print( "Conclusion: ", generate_cat_stat_sentence(np.mean(metric_t10_values), np.std(metric_t10_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t10a.dss", label2=treatment + ".dss")) print( pd.crosstab(pd.Series(metric_t10_values), pd.Series(metric_values))) else: table, res = rp.ttest(pd.Series(metric_t10_values), pd.Series(metric_values), paired=False) s = res.results[2] if alternative == "greater": p = res.results[4] elif alternative == "less": p = res.results[5] elif alternative in (None, 'two-sided'): p = res.results[3] r = res.results[9] diff = res.results[0] dof = res.results[1] s = res.results[2] p = res.results[3] r = res.results[9] print( "Conclusion: ", generate_stat_sentence(np.mean(metric_t10_values), np.std(metric_t10_values), np.mean(metric_values), np.std(metric_values), s, p, dof, diff=diff, label1="t10a.dss", label2=treatment + ".dss")) test_label = f"(ttest independent) H0: {'equal' if alternative in {None, 'two-sided'} else alternative}" print("TABLE: ", table) print("TEST: ", res) if as_percentage: res = { "Proposer + DSS": f'{100 * metric_value:.2f} %', "T10": f'{100 * metric_value_t10:.2f} %', } else: res = { "Proposer + DSS": f'{metric_value:.2f}', "T10": f'{metric_value_t10:.2f}', } if is_categorical: res[test_label] = f"{s:.3f} (p: {p:.3f}, phi: {r:.3f})" else: res[test_label] = f"{s:.3f} (p: {p:.3f}, r: {r:.3f})" return res