def mann_whitney(survey_id, df, independent_variable, dependent_variable, form): if is_string_dtype(df[dependent_variable]): flash( "Dependent Variable '" + dependent_variable + "' is not numeric.", "danger") return render_template("analysis/analysedata.html", form=form) group_by = df.groupby(independent_variable) group_array = [group_by.get_group(x) for x in group_by.groups] if len(group_array) != 2: flash( "Independent variable '" + independent_variable + "' has too many groups, only 2 allowed for Mann-Whitney U Test.", "danger") return render_template("analysis/analysedata.html", form=form) x = group_array[0][dependent_variable].values y = group_array[1][dependent_variable].values mwu_result = mwu(x, y) p_value = "%.4f" % mwu_result.at["MWU", "p-val"] return redirect( url_for('analysis.result', survey=survey_id, test="Mann-Whitney U Test", p_value=p_value, independent_variable=independent_variable, dependent_variable=dependent_variable))
def plot_synchronies(data_cluster): #% all rep together data_diff = data_cluster.query('type == "diff"') data_same = data_cluster.query('type == "same"') results_mwu = pg.mwu(data_same['distance'].values, data_diff['distance'].values, 'greater') p_val = results_mwu['p-val'][0] d = pg.compute_effsize(data_same['distance'], data_diff['distance'], eftype='cohen') plt.hist(data_diff['distance'].values, density=True, color='r', alpha=0.25) plt.hist(data_same['distance'].values, density=True, color='g', alpha=0.25) plt.grid() ca = plt.gca() x_min, x_max = ca.get_xlim() x = x_min + 0.5 * (x_max - x_min) y_min, y_max = ca.get_ylim() y = y_min + 0.85 * (y_max - y_min) if p_val < 0.05: plt.text(x, y, 'p = {:.3f}, d = {:.3f}'.format(p_val, d), ha='center', fontweight='bold') else: plt.text(x, y, 'p = {:.3f}, d = {:.3f}'.format(p_val, d), ha='center')
def mwu_test(err_or_dt, base, var, fold_change, min_exp_bound=-float('inf'), ignore_exp=()): steps_var_data = get_svd(err_or_dt, base, var, min_exp_bound, ignore_exp) svd_items = tuple(steps_var_data.items()) print("Mann-Whitney U Test ") for (s1, df1), (s2, df2) in zip(svd_items[:-1], svd_items[1:]): print(s1, s2) print(pg.mwu(df1[fold_change], df2[fold_change])) print()
def get_d_p(data_cluster): data_diff = data_cluster.query('type == "SEP"') data_same = data_cluster.query('type == "TOG"') same_boot = bootstrap(data_same['distance'].values) diff_boot = bootstrap(data_diff['distance'].values) results_mwu = pg.mwu(same_boot, diff_boot, 'greater') p_val = results_mwu['p-val'][0] d = pg.compute_effsize(same_boot, diff_boot, eftype='cohen') return (d, p_val)
def binary_test(df, var1, var2): """Mann-Whitney U test on variable 2 separated by variable 1 var1: string, binary variable var2: string, continuous variable """ print(f'Variable of interst: {var1}') print(f'Possible values: {np.unique(df[var1])}') final_df = [] for dif in np.unique(df['pres_difficulty']): print(f'Difficulty: {dif}') bin_list = [] for var1_sub in np.unique(df[var1]): bin_list.append( df[(df[var1] == var1_sub) & (df['pres_difficulty'] == dif)][var2].to_numpy()) res = pg.mwu(bin_list[0], bin_list[1]) final_df.append(res) print(res) return pd.concat(final_df)
def mann_whitney(df, independent_variable, dependent_variable): # Group the data by the independent_variable group_by = df.groupby(independent_variable) # Convert to an array of groups group_array = [group_by.get_group(x) for x in group_by.groups] # Get the values of groups 1 and 2 from the array x = group_array[0][dependent_variable].values y = group_array[1][dependent_variable].values keys = list(group_by.groups.keys()) # Get the distinct keys (we have already checked there are only 2) and save them in variables group_1 = keys[0] group_2 = keys[1] # Perform test mwu_result = mwu(x, y) # Get the p_value from the result and format to 4 decimals p_value = float("%.4f" % mwu_result['p-val'].values[0]) result = { "test": "Mann-Whitney U Test", "p_value": p_value, "variable_1": independent_variable, "variable_2": dependent_variable, "null": f"The distribution of '{dependent_variable}' is the same across groups of '{independent_variable}'", "info": """Assumes that the dependent variable ('{0}') is ordinal or continuous, that the independent variable ('{1}') consists of just 2 groups ('{2}' and '{3}') and that these groups follow the same distribution (the shape on a histogram).""".format(dependent_variable, independent_variable, group_1, group_2) } return p_value, result
import seaborn as sns import matplotlib.pyplot as plt from pingouin import mwu # data df = pd.read_csv('mwugenderpercent.csv') # display(df.head()) # mann-whitney u # x är mansraden, y är kvinnoraden # nollhyp = sign skilln gr # results = mannwhitneyu(df['men'], df['women']) # display(results) # results2 = stats.mannwhitneyu(df['men'], df['women'], alternative='two-sided') # display(results2) # print('-----------') results3 = mwu(df['men'], df['women'], tail='two-sided') display(results3) print('Om p < 0.05 --> signifikant skillnad mellan grupperna.') ########################################################################################### #In statistics, the Mann–Whitney U test (also called the Mann–Whitney–Wilcoxon (MWW), # #Wilcoxon rank-sum test, or Wilcoxon–Mann–Whitney test) is a nonparametric test of the # #null hypothesis that it is equally likely that a randomly selected value from one sample # #will be less than or greater than a randomly selected value from a second sample. # # # #Under the null hypothesis H0, the probability of an observation from the population 𝑋 # #exceeding an observation from the second population 𝑌 equals the probability of an # #observation from 𝑌 exceeding an observation from 𝑋. # #Two group means are different (two-tailed) # ###########################################################################################
def test_correlations(positive_samples_path, negative_samples_path, attractors_table_path): """ Computes possible correlations between seed sentence properties, scores them for significance, and produces corresponding visualizations """ # Read-in sample tables print('Reading-in tables ...') with open(positive_samples_path, 'r', encoding='utf8') as psp: positive_samples = json.load(psp) with open(negative_samples_path, 'r', encoding='utf8') as nsp: negative_samples = json.load(nsp) # Read-in attractor table print('Reading-in attractor table ...') with open(attractors_table_path, 'r', encoding='utf8') as atp: attractors_table = json.load(atp) # Find lowest PMI value pmi_min_per_term = list() for term in attractors_table.keys(): pmi_min_per_sense = list() for sense in attractors_table[term].keys(): pmi_min_per_sense. \ append(min([tpl[1] for tpl in attractors_table[term][sense]['[SORTED ATTRACTORS BY PMI]']])) pmi_min_per_term.append(min(pmi_min_per_sense)) pmi_lower_bound = min(pmi_min_per_term) # Initialize score cache seed_parses = dict() seed_score_cache = dict() metrics = ['[SENT. SEED ATTRACTOR TOKEN FREQ]', '[SENT. SEED ATTRACTOR TOKEN PMI]', '[SENT. SEED ATTRACTOR TOKEN PPMI]', '[SENT. ADV ATTRACTOR TOKEN FREQ]', '[SENT. ADV ATTRACTOR TOKEN PMI]', '[SENT. ADV ATTRACTOR TOKEN PPMI]', '[SENT. ADV-SEED ATTRACTOR TOKEN FREQ DIFF]', '[SENT. ADV-SEED ATTRACTOR TOKEN PMI DIFF]', '[SENT. ADV-SEED ATTRACTOR TOKEN PPMI DIFF]', '[SEED SENTENCE LENGTH]'] # Collect scores print('Looking up scores ...') positive_scores = {m: list() for m in metrics} negative_scores = {m: list() for m in metrics} # Restrict generation strategies generation_strategies = [] # generation_strategies = ['insert_at_homograph', 'replace_at_homograph'] # Declare function used to compute sentence-level scores # sent_fun = np.mean sent_fun = sum for samples, scores, path in [(positive_samples, positive_scores, positive_samples_path), (negative_samples, negative_scores, negative_samples_path)]: seen_seeds = dict() for term in samples.keys(): print('Looking-up the term \'{:s}\''.format(term)) for seed_cluster in samples[term].keys(): # Compute sentence-level scores for the relevant cluster seed_sorted_attractor_freq = attractors_table[term][seed_cluster]['[SORTED ATTRACTORS BY FREQ]'] seed_sorted_attractor_pmi = attractors_table[term][seed_cluster]['[SORTED ATTRACTORS BY PMI]'] seed_attractor_token_freq_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in seed_sorted_attractor_freq} seed_attractor_token_pmi_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in seed_sorted_attractor_pmi} for adv_cluster in samples[term][seed_cluster].keys(): # Compute sentence-level scores for the relevant cluster adv_sorted_attractor_freq = attractors_table[term][adv_cluster]['[SORTED ATTRACTORS BY FREQ]'] adv_sorted_attractor_pmi = attractors_table[term][adv_cluster]['[SORTED ATTRACTORS BY PMI]'] adv_attractor_token_freq_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in adv_sorted_attractor_freq} adv_attractor_token_pmi_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in adv_sorted_attractor_pmi} for seed_sentence in samples[term][seed_cluster][adv_cluster].keys(): for sample in samples[term][seed_cluster][adv_cluster][seed_sentence]: seed_sentence = seed_sentence.strip() # Only consider samples derived from correctly translated seeds if 'true_samples' not in path: if 'attractors' not in path: if sample[-1][0] != 'not_flipped': continue # Skip samples obtained through disregarded generation strategies if len(generation_strategies) > 0: if sample[-2][-1] not in generation_strategies: continue else: seen_key = (seed_sentence, term, sample[3]) if seen_seeds.get(seen_key, None) is not None: continue seen_seeds[seen_key] = True # Compute sentence-level scores seed_scores = seed_score_cache.get((seed_sentence, term, seed_cluster), None) if seed_scores is None: seed_scores = _get_sentence_scores(seed_sentence, seed_parses, seed_attractor_token_freq_dict, seed_attractor_token_pmi_dict) seed_score_cache[(seed_sentence, term, seed_cluster)] = seed_scores[:-1] seed_parses = seed_scores[-1] if 'true_samples' not in path: adv_scores = seed_score_cache.get((seed_sentence, term, adv_cluster), None) if adv_scores is None: adv_scores = _get_sentence_scores(seed_sentence, seed_parses, adv_attractor_token_freq_dict, adv_attractor_token_pmi_dict) seed_score_cache[(seed_sentence, term, adv_cluster)] = adv_scores[:-1] seed_parses = adv_scores[-1] else: adv_scores = None adv_freq_scores = [[0.], [0.], [0.], [0.], [0.]] adv_pmi_scores = [[0.], [0.], [0.], [0.], [0.]] adv_ppmi_scores = [[0.], [0.], [0.], [0.], [0.]] # Iterate over sense clusters consistent with the mistranslation for ac in attractors_table[term].keys(): if ac == seed_cluster: continue # Compute sentence-level scores for the relevant cluster adv_sorted_attractor_freq = \ attractors_table[term][ac]['[SORTED ATTRACTORS BY FREQ]'] adv_sorted_attractor_pmi = \ attractors_table[term][ac]['[SORTED ATTRACTORS BY PMI]'] adv_attractor_token_freq_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in adv_sorted_attractor_freq} adv_attractor_token_pmi_dict = {attr_tpl[0]: attr_tpl[1] for attr_tpl in adv_sorted_attractor_pmi} ac_scores = _get_sentence_scores(seed_sentence, seed_parses, adv_attractor_token_freq_dict, adv_attractor_token_pmi_dict) seed_score_cache[(seed_sentence, term, ac)] = ac_scores[:-1] seed_parses = ac_scores[-1] # Pick the cluster corresponding to the highest FREQ / PPMI score if sent_fun(ac_scores[0]) > sent_fun(adv_freq_scores[0]): adv_freq_scores = ac_scores[:-1] if sent_fun(ac_scores[1]) > sent_fun(adv_pmi_scores[1]): adv_pmi_scores = ac_scores[:-1] if sent_fun(ac_scores[2]) > sent_fun(adv_ppmi_scores[2]): adv_ppmi_scores = ac_scores[:-1] # Extend score tables if len(seed_scores[0]) > 0: seed_freq = sent_fun(seed_scores[0]) seed_ppmi = sent_fun(seed_scores[2]) scores['[SENT. SEED ATTRACTOR TOKEN PMI]'] \ .append(sent_fun(seed_scores[1]) / seed_scores[3]) else: seed_freq = 0 seed_ppmi = 0 scores['[SENT. SEED ATTRACTOR TOKEN PMI]'].append(pmi_lower_bound) scores['[SENT. SEED ATTRACTOR TOKEN FREQ]'].append(seed_freq / seed_scores[3]) scores['[SENT. SEED ATTRACTOR TOKEN PPMI]'].append(seed_ppmi / seed_scores[3]) if adv_scores is not None: if len(adv_scores[0]) > 0: adv_freq = sent_fun(adv_scores[0]) adv_ppmi = sent_fun(adv_scores[2]) scores['[SENT. ADV ATTRACTOR TOKEN PMI]'] \ .append(sent_fun(adv_scores[1]) / seed_scores[3]) else: adv_freq = 0 adv_ppmi = 0 scores['[SENT. ADV ATTRACTOR TOKEN PMI]'].append(pmi_lower_bound) else: if len(adv_freq_scores[0]) > 0: adv_freq = sent_fun(adv_freq_scores[0]) else: adv_freq = 0 if len(adv_pmi_scores[1]) > 0: scores['[SENT. ADV ATTRACTOR TOKEN PMI]'] \ .append(sent_fun(adv_pmi_scores[1]) / seed_scores[3]) else: scores['[SENT. ADV ATTRACTOR TOKEN PMI]'].append(pmi_lower_bound) if len(adv_ppmi_scores[2]) > 0: adv_ppmi = sent_fun(adv_ppmi_scores[2]) else: adv_ppmi = 0 scores['[SENT. ADV ATTRACTOR TOKEN FREQ]'].append(adv_freq / seed_scores[3]) scores['[SENT. ADV ATTRACTOR TOKEN PPMI]'].append(adv_ppmi / seed_scores[3]) scores['[SENT. ADV-SEED ATTRACTOR TOKEN FREQ DIFF]']\ .append(scores['[SENT. ADV ATTRACTOR TOKEN FREQ]'][-1] - scores['[SENT. SEED ATTRACTOR TOKEN FREQ]'][-1]) scores['[SENT. ADV-SEED ATTRACTOR TOKEN PMI DIFF]']\ .append(scores['[SENT. ADV ATTRACTOR TOKEN PMI]'][-1] - scores['[SENT. SEED ATTRACTOR TOKEN PMI]'][-1]) scores['[SENT. ADV-SEED ATTRACTOR TOKEN PPMI DIFF]'] \ .append(scores['[SENT. ADV ATTRACTOR TOKEN PPMI]'][-1] - scores['[SENT. SEED ATTRACTOR TOKEN PPMI]'][-1]) scores['[SEED SENTENCE LENGTH]'].append(seed_scores[3]) # Calculate correlation values correlation_values = dict() print('Computing correlations ...') for metric_key in metrics: print('Metric: {:s}'.format(metric_key)) correlation_values[metric_key] = dict() positive_metric_scores = positive_scores[metric_key] negative_metric_scores = negative_scores[metric_key] # Perform the Mann–Whitney U test mwu_df = mwu(negative_metric_scores, positive_metric_scores, tail='two-sided') mwu_df_rev = mwu(positive_metric_scores, negative_metric_scores, tail='two-sided') correlation_values[metric_key]['MWU'] = mwu_df correlation_values[metric_key]['MWU_rev'] = mwu_df_rev # Add mean (addition indication of the effect size) correlation_values[metric_key]['MEANS'] = (np.mean(positive_metric_scores), np.mean(negative_metric_scores), np.mean(positive_metric_scores) - np.mean(negative_metric_scores)) # Report results # Compute threshold for effect size interpretation num_pos = len(positive_scores['[SEED SENTENCE LENGTH]']) num_neg = len(negative_scores['[SEED SENTENCE LENGTH]']) base_pos = num_pos / (num_pos + num_neg) base_neg = num_neg / (num_pos + num_neg) small_threshold = 0.2 / np.sqrt(0.2 ** 2 + (1 / (base_pos * base_neg))) moderate_threshold = 0.5 / np.sqrt(0.5 ** 2 + (1 / (base_pos * base_neg))) max_threshold = 0.8 / np.sqrt(0.8 ** 2 + (1 / (base_pos * base_neg))) print('-' * 20) print('RESULTS: ') for metric_key in metrics: print(metric_key) for measure in ['MWU', 'MEANS']: if measure == 'MEANS': values = list() for v in correlation_values[metric_key][measure]: values.append(float('{:.4f}'.format(v))) print(measure, ' ', values) else: u = correlation_values[metric_key][measure].iloc[0]['U-val'] u_rev = correlation_values[metric_key]['MWU_rev'].iloc[0]['U-val'] p = correlation_values[metric_key][measure].iloc[0]['p-val'] p = p if p > 0.00005 else 0.0 rbc = correlation_values[metric_key][measure].iloc[0]['RBC'] # cles = correlation_values[metric_key][measure].iloc[0]['CLES'] aw = ((num_pos * num_neg) - u) / (num_pos * num_neg) aw_rev = ((num_pos * num_neg) - u_rev) / (num_pos * num_neg) print('MWU (u / p) : {:.3f}, {:.4f}'.format(u, p)) print('MWU (rbc) : {:.3f}'.format(rbc)) print('MWU (Aw) : {:.3f} | {:.3f}'.format(aw, aw_rev)) print('-' * 10) print('Thresholds: {:.4f} | {:.4f} | {:.4f}'.format(small_threshold, moderate_threshold, max_threshold))
def analyse(survey_id): form = StatisticalTestForm() survey = mongo.db.surveys.find_one_or_404({"_id": ObjectId(survey_id)}) if survey["user"] != current_user._id: flash("You do not have access to that page", "danger") abort(403) df = read_file(survey["fileName"]) # Populate the select options in the form with all the variables for variable in list(df.columns.values): form.independent_variable.choices.append((variable, variable)) form.dependent_variable.choices.append((variable, variable)) if form.validate_on_submit(): # Get the dataset, and save the variables in python variables independent_variable = form.independent_variable.data dependent_variable = form.dependent_variable.data # Ensure the user hasn't selected the same variable for both if independent_variable == dependent_variable: flash("You can't select the same variable for both.", "danger") return render_template("analysis/analysedata.html", form=form) test = form.test.data # If the user selects Chi-Square goodness fit then they are redirected to a separate URL if test == "Chi-Square goodness of fit": return redirect( url_for('analysis.chi_goodness', variable=independent_variable, survey_id=survey_id)) # The other tests all require a dependent variable if dependent_variable == "": flash("You must select a dependent variable for this test.", "danger") return render_template("analysis/analysedata.html", form=form) if test == "Kruskall Wallis Test": if is_string_dtype(df[dependent_variable]): flash( "Dependent Variable '" + dependent_variable + "' is not numeric.", "danger") return render_template("analysis/analysedata.html", form=form) kruskal_result = kruskal(data=df, dv=dependent_variable, between=independent_variable) # get the p-value (p-unc) from the kruskal test and convert to 4 decimal places only p_value = "%.4f" % kruskal_result["p-unc"][0] # AT THE MOMENT, THIS TEST IS 2 TAILED. MAY WANT TO ADD OPTIONS FOR 1 TAILED TESTS elif test == "Mann-Whitney U Test": if is_string_dtype(df[dependent_variable]): flash( "Dependent Variable '" + dependent_variable + "' is not numeric.", "danger") return render_template("analysis/analysedata.html", form=form) group_by = df.groupby(independent_variable) group_array = [group_by.get_group(x) for x in group_by.groups] if len(group_array) != 2: flash( "Independent variable '" + independent_variable + "' has too many groups, only 2 allowed for Mann-Whitney U Test.", "danger") return render_template("analysis/analysedata.html", form=form) x = group_array[0][dependent_variable].values y = group_array[1][dependent_variable].values mwu_result = mwu(x, y) p_value = "%.4f" % mwu_result['p-val'].values[0] elif test == "Chi-Square Test": contingency_table = pd.crosstab(df[independent_variable], df[dependent_variable]) _, p_value, _, _ = chi2_contingency(contingency_table, correction=False) return redirect( url_for('analysis.result', survey=survey_id, test=test, p_value=p_value, independent_variable=independent_variable, dependent_variable=dependent_variable)) return render_template("analysis/analysedata.html", form=form)
if param_vs_nonparam == "Parametric tests (Student, Welch)": if homoscedasticity.loc["levene", "pval"] < 0.05: test_message = "Welch test results:" else: test_message = "Student t-test results:" st.success(test_message) t = pg.ttest(x1, x2) st.write(t) else: test_message = "Mann-Whitney test results:" st.success(test_message) mw = pg.mwu(x1, x2) st.write(mw) md = markdown.Markdown() ipsum_path = Path('Md/student_help.md') data = ipsum_path.read_text(encoding='utf-8') html = md.convert(data) # help_markdown = util.read_markdown_file("help.md") st.markdown(html, unsafe_allow_html=True) st.markdown("## ") st.success("Bar plots with errors are being generated") fig = plt.figure(figsize=(12, 6)) error = None
vara_alle vara_musik vara_sound stda_alle stda_music stda_sound #plot einer Gruppe get_ipython().magic(u'matplotlib inline') plt.plot(mean_w) plt.xlabel('Zeitpunkte') plt.ylabel('Cortisol nmol/L') plt.title("Mittelwerte der Cortisolmessungen in der Sound Gruppe") #plot zwei Gruppen gegeneinander get_ipython().magic(u'matplotlib inline') fig, ax = plt.subplots() ax.plot(mean_m, label='musik') ax.plot(mean_w, label='sound') plt.xlabel('Zeitpunkte') plt.ylabel('Cortisol nmol/L') plt.title("Mittelwerte Cortisol beide Gruppen") plt.legend() ''' Normalvertteilt?-Nein wenn p unter alpha''' stats.shapiro(mean_w) ttest(mean_w, mean_m, paired =False) '''Man Whitney U Test , angenommen nicht parametrisch''' pg.mwu(mean_w, mean_m)
def test_correlations(positive_samples_path, negative_samples_path, attractors_table_path): """ Computes possible correlations between attractor importance (and other) metrics, scores them for significance, and produces corresponding visualizations """ # Read-in tables print('Reading-in tables ...') with open(positive_samples_path, 'r', encoding='utf8') as psp: positive_samples = json.load(psp) with open(negative_samples_path, 'r', encoding='utf8') as nsp: negative_samples = json.load(nsp) # Read-in attractor table print('Reading-in attractor table ...') with open(attractors_table_path, 'r', encoding='utf8') as atp: attractors_table = json.load(atp) # Declare attractor importance metrics to consider metrics = [ '[HOMOGRAPH TOTAL FREQUENCY]', '[HOMOGRAPH SEED FREQUENCY]', '[HOMOGRAPH ADV FREQUENCY]', '[HOMOGRAPH FREQ DIFF]' ] # Collect scores print('Looking up scores ...') positive_scores = {m: list() for m in metrics} negative_scores = {m: list() for m in metrics} for term in positive_samples.keys(): total_attractor_freq = sum([ len(attractors_table[term][sense_cluster]['[SENTENCE PAIRS]']) for sense_cluster in attractors_table[term].keys() ]) for seed_cluster in positive_samples[term].keys(): seed_attractor_freq = len( attractors_table[term][seed_cluster]['[SENTENCE PAIRS]']) for adv_cluster in positive_samples[term][seed_cluster].keys(): adv_attractor_freq = len( attractors_table[term][adv_cluster]['[SENTENCE PAIRS]']) for seed_sentence in positive_samples[term][seed_cluster][ adv_cluster].keys(): for sample in positive_samples[term][seed_cluster][ adv_cluster][seed_sentence]: # Only consider samples derived from correctly translated seeds if 'true_samples' not in positive_samples_path: if 'attractors' not in positive_samples_path: if sample[-1][0] != 'not_flipped': continue positive_scores['[HOMOGRAPH TOTAL FREQUENCY]'].append( total_attractor_freq) positive_scores['[HOMOGRAPH SEED FREQUENCY]'].append( seed_attractor_freq) if 'bad_translations' not in positive_samples_path: positive_scores[ '[HOMOGRAPH ADV FREQUENCY]'].append( adv_attractor_freq) else: translation_clusters = list(set(sample[2])) tc_frequencies = list() for tc in translation_clusters: # Needed to skip clusters for which no attractors are known if attractors_table[term].get(tc, None) is None: continue tc_frequencies.append( len(attractors_table[term][tc] ['[SENTENCE PAIRS]'])) adv_attractor_freq = max(tc_frequencies) positive_scores[ '[HOMOGRAPH ADV FREQUENCY]'].append( adv_attractor_freq) positive_scores['[HOMOGRAPH FREQ DIFF]'].append( adv_attractor_freq - seed_attractor_freq) for term in negative_samples.keys(): total_attractor_freq = sum([ len(attractors_table[term][sense_cluster]['[SENTENCE PAIRS]']) for sense_cluster in attractors_table[term].keys() ]) for seed_cluster in negative_samples[term].keys(): seed_attractor_freq = len( attractors_table[term][seed_cluster]['[SENTENCE PAIRS]']) for adv_cluster in negative_samples[term][seed_cluster].keys(): adv_attractor_freq = len( attractors_table[term][adv_cluster]['[SENTENCE PAIRS]']) for seed_sentence in negative_samples[term][seed_cluster][ adv_cluster].keys(): for sample in negative_samples[term][seed_cluster][ adv_cluster][seed_sentence]: # Only consider samples derived from correctly translated seeds if 'true_samples' not in negative_samples_path: if 'attractors' not in negative_samples_path: if sample[-1][0] != 'not_flipped': continue negative_scores['[HOMOGRAPH TOTAL FREQUENCY]'].append( total_attractor_freq) negative_scores['[HOMOGRAPH SEED FREQUENCY]'].append( seed_attractor_freq) if 'bad_translations' not in negative_samples_path: negative_scores[ '[HOMOGRAPH ADV FREQUENCY]'].append( adv_attractor_freq) else: translation_clusters = list(set(sample[2])) tc_frequencies = list() for tc in translation_clusters: # Needed to skip clusters for which no attractors are known if attractors_table[term].get(tc, None) is None: continue tc_frequencies.append( len(attractors_table[term][tc] ['[SENTENCE PAIRS]'])) adv_attractor_freq = max(tc_frequencies) negative_scores[ '[HOMOGRAPH ADV FREQUENCY]'].append( adv_attractor_freq) negative_scores['[HOMOGRAPH FREQ DIFF]'].append( adv_attractor_freq - seed_attractor_freq) # Calculate correlation values correlation_values = dict() print('Computing correlations ...') for metric_key in metrics: correlation_values[metric_key] = dict() positive_metric_scores = positive_scores[metric_key] negative_metric_scores = negative_scores[metric_key] # Perform the Mann–Whitney U test mwu_df = mwu(negative_metric_scores, positive_metric_scores, tail='two-sided') mwu_df_rev = mwu(positive_metric_scores, negative_metric_scores, tail='two-sided') correlation_values[metric_key]['MWU'] = mwu_df correlation_values[metric_key]['MWU_rev'] = mwu_df_rev # Add mean (addition indication of the effect size) correlation_values[metric_key]['MEANS'] = ( np.mean(positive_metric_scores), np.mean(negative_metric_scores), np.mean(positive_metric_scores) - np.mean(negative_metric_scores)) # Report results # Compute threshold for effect size interpretation num_pos = len(positive_scores['[HOMOGRAPH TOTAL FREQUENCY]']) num_neg = len(negative_scores['[HOMOGRAPH TOTAL FREQUENCY]']) base_pos = num_pos / (num_pos + num_neg) base_neg = num_neg / (num_pos + num_neg) small_threshold = 0.2 / np.sqrt(0.2**2 + (1 / (base_pos * base_neg))) moderate_threshold = 0.5 / np.sqrt(0.5**2 + (1 / (base_pos * base_neg))) max_threshold = 0.8 / np.sqrt(0.8**2 + (1 / (base_pos * base_neg))) print('-' * 20) print('RESULTS: ') for metric_key in metrics: print(metric_key) for measure in ['MWU', 'MEANS']: if measure == 'MEANS': values = list() for v in correlation_values[metric_key][measure]: values.append(float('{:.4f}'.format(v))) print(measure, ' ', values) else: u = correlation_values[metric_key][measure].iloc[0]['U-val'] u_rev = correlation_values[metric_key]['MWU_rev'].iloc[0][ 'U-val'] p = correlation_values[metric_key][measure].iloc[0]['p-val'] p = p if p > 0.00005 else 0.0 rbc = correlation_values[metric_key][measure].iloc[0]['RBC'] # cles = correlation_values[metric_key][measure].iloc[0]['CLES'] aw = ((num_pos * num_neg) - u) / (num_pos * num_neg) aw_rev = ((num_pos * num_neg) - u_rev) / (num_pos * num_neg) print('MWU (u / p) : {:.3f}, {:.4f}'.format(u, p)) print('MWU (rbc) : {:.3f}'.format(rbc)) print('MWU (Aw) : {:.3f} | {:.3f}'.format(aw, aw_rev)) print('-' * 10) print('Thresholds: {:.4f} | {:.4f} | {:.4f}'.format( small_threshold, moderate_threshold, max_threshold))
plt.ylabel('Count') plt.gca().get_yaxis().set_major_locator(ticker.MaxNLocator(integer=True)) plt.legend() plt.show() # In[ ]: # Two-sided Mann-Whitney U Test from pingouin import mwu pre_score_sum = pre_scores.iloc[:, 4:17].sum(axis=1) post_score_sum = post_scores.iloc[:, 4:17].sum(axis=1) # Comparing Pre and Post tests including both learning styles print('Pre and Post tests including both learning styles') mwu(post_score_sum, pre_score_sum, tail='two-sided') #ref: https://pingouin-stats.org/generated/pingouin.mwu.html#rf5915ba8ddc9-2 #ref: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test # In[ ]: # Two-sided Mann-Whitney U Test # Comparing Pre and Post tests in terms of text-based learning print('Pre and Post tests in terms of text-based learning') mwu(post_score_sum_t, pre_score_sum_t, tail='two-sided') # In[ ]: # Two-sided Mann-Whitney U Test # Comparing Pre and Post tests in terms of video-based learning
def qualOrdinalUnpaired(imgDir, sheetName, sheetDf, sheetScale, silent=False): print("######################################## ", sheetName, " ########################################" ) if not silent else None meltedSheetDf = sheetDf.melt(var_name='Factor', value_name='Variable') contingencySheetDf = pd.crosstab(index=meltedSheetDf['Variable'], columns=meltedSheetDf['Factor']) statDf = pd.DataFrame(columns=[ 'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE' ]) #fill empty scale value for sheetStep in range(sheetScale): if not sheetStep in contingencySheetDf.index.values: contingencySheetDf.loc[sheetStep] = [ 0 for x in range(len(contingencySheetDf.columns.values)) ] contingencySheetDf.sort_index(inplace=True) # ALL MODALITY if len(contingencySheetDf.columns) > 2: sheetDf_long = sheetDf.melt(ignore_index=False).reset_index() kruskal_stats = pg.kruskal(data=sheetDf_long, dv="value", between="variable") source, ddof1, hvalue, pvalue = kruskal_stats.values[0] statDf = statDf.append( { 'COMPARISON': 'ALL', 'TEST': "Kruskal-Wallis", 'STATISTICS': hvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': -1 }, ignore_index=True) # BETWEEN MODALITY modality_names = sheetDf.columns.values uncorrectedStatIndex = len(statDf.index) for i in range(len(modality_names)): for j in range(i + 1, len(modality_names)): stats_mannwhitney = pg.mwu(x=sheetDf.loc[:, modality_names[i]], y=sheetDf.loc[:, modality_names[j]], alternative='two-sided') uvalue, alternative, pvalue, RBC, CLES = stats_mannwhitney.values[ 0] statDf = statDf.append( { 'COMPARISON': modality_names[i] + '|' + modality_names[j], 'TEST': "Mann-Whitney", 'STATISTICS': uvalue, 'P-VALUE': pvalue, 'EFFECT SIZE': RBC }, ignore_index=True) reject, statDf.loc[uncorrectedStatIndex::, 'P-VALUE'] = pg.multicomp( statDf.loc[uncorrectedStatIndex::, 'P-VALUE'].values, alpha=0.05, method="holm") StackedBarPlotter.StackedBarPlotter(filename=imgDir + '/' + sheetName + '.png', title=sheetName, dataDf=sheetDf, histDf=contingencySheetDf, statDf=statDf)