def mcnemar_test(p1, p2): con_table = [[0, 0], [0, 0]] for i, p1_val in enumerate(p1): p2_val = p2[i] if p1_val == True and p2_val == True: con_table[0][0] += 1 elif p1_val == True and p2_val == False: con_table[0][1] += 1 elif p1_val == False and p2_val == True: con_table[1][0] += 1 elif p1_val == False and p2_val == False: con_table[1][1] += 1 print("Contingency table: {}".format(con_table)) # test statistic must be calculated using binomial distribution if any of the table values are less than 25 if any(val < 25 for entry in con_table for val in entry): print("Some value < 25. Calculating exact p-value") result = sm.mcnemar(con_table, exact=True) else: print("All values >= 25. Calculating standard McNemar's statistic") result = sm.mcnemar(con_table, exact=False, correction=True) print('statistic=%.3f, p-value=%.5f' % (result.statistic, result.pvalue)) alpha = 0.05 if result.pvalue > alpha: print('Same proportions of errors (fail to reject H0)') return result.statistic, result.pvalue, 'fail' else: print('Different proportions of errors (reject H0)') return result.statistic, result.pvalue, 'reject'
def mcnemarTest(modelA, modelB, testData, testLabels, confidence=0.95): # not complete - need to understand the output of mcnemar function... from statsmodels.stats.contingency_tables import mcnemar from sklearn.metrics import confusion_matrix # get test results predictA = np.argmax(modelA.predict(testData), axis=1) predictB = np.argmax(modelB.predict(testData), axis=1) actual = np.argmax(testLabels, axis=1) resultsA = predictA == actual resultsB = predictB == actual # build confusion matrix confMatrix = confusion_matrix(resultsA, resultsB) # check if all values in confusion matrix > 25 (only interested in Yes/No and No/Yes fields) if (confMatrix[0, 1] > 25) and (confMatrix[1, 0] > 25): standardTest = True else: standardTest = False # do McNemar test if standardTest == True: stats = mcnemar(confMatrix, exact=False, correction=True) else: stats = mcnemar(confMatrix, exact=True) pval = stats.pvalue print "### McNemar Test ###" print confMatrix print "Pvalue: {}".format(pval) if pval < (1 - confidence): print "Significant difference (reject null hypothesis)" else: print "No significant difference (accept null hypothesis)"
def test_mannwhithney(predfile1, predfile2, testfile, testfile2): y_true1, y_pred1, y_true_prec1, y_pred_prec1 = evaluate( testfile, predfile1) y_true2, y_pred2, y_true_prec2, y_pred_prec2 = evaluate( testfile2, predfile2) print('\n First model: ', predfile1) print('Ex: ', y_pred1[:10], ' Len: ', len(y_pred1)) print('Second model: ', predfile2) print('Ex: ', y_pred2[:10], ' Len: ', len(y_pred2)) print( 'Is testset the same? ', len([ i for i in np.equal(np.array(y_true1), np.array(y_true2)) if i is False ])) mc_tb = mcnemar_table(y_target=np.array(y_true1), y_model1=np.array(y_pred1), y_model2=np.array(y_pred2)) print('Contingency table: ', mc_tb) mcnemar_res = mcnemar(mc_tb) print('McNemar: p value: {:.20f}'.format(mcnemar_res.pvalue)) chi2, p = mlx_mcnemar(ary=mc_tb, corrected=True) print('McNemar: chi:{:.4f} p value: {}'.format(chi2, p)) mc_tb_prec = mcnemar_table(y_target=np.array(y_true_prec1), y_model1=np.array(y_pred_prec1), y_model2=np.array(y_pred_prec2)) mcnemar_res_prec = mcnemar(mc_tb_prec) print('McNemar PRECISION: p value: {}'.format(mcnemar_res_prec.pvalue))
def Comparing_classifiers(this: ClassificationReport, that: ClassificationReport): this_that_conf_mat = confusion_matrix(this.y_test_pred, that.y_test_pred) print() print(120 * '-') print(f'Comparing {this.classifier} with {that.classifier}') print(60 * '-', 'Confusion matrix', 60 * '-') print(this_that_conf_mat) # print(this_that_conf_mat.shape) print( f'Null Hypothesis H0 = Both {this.classifier} and {that.classifier} have predictions that are similar and make errors in much the same proportion' ) if this_that_conf_mat.shape[0] > 1: print( f'Null Hypothesis H0 = Both {this.classifier} and {that.classifier} have predictions that are similar and make errors in much the same proportion' ) result = mcnemar(this_that_conf_mat, exact=False) print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue)) alpha = 0.01 if result.pvalue > alpha: print( f'Since P Value is greater than {alpha}, thus Same proportions of errors (fail to reject H0)' ) else: print( f'Since P Value is less than {alpha}, thus Different proportions of errors (reject H0)' ) else: print('Confusion matrix cannot be created as they produce same result') print('Same proportions of errors (fail to reject H0)') print(120 * '-')
def mcnemar_test(in1, in2): yes_yes = 0 yes_no = 0 no_yes = 0 no_no = 0 # Get individual predictions, mark as correct or not results1 = contingency_stats(in1) results2 = contingency_stats(in2) for key in results1.keys(): res1 = results1[key] res2 = results2[key] if res1 + res2 == 2: yes_yes += 1 elif res1 + res2 == 0: no_no += 1 elif res1 == 1 and res2 == 0: yes_no += 1 elif res1 == 0 and res2 == 1: no_yes += 1 # Construct contingency table table = [[yes_yes, yes_no], [no_yes, no_no]] table = [] stat, pval = mcnemar(table, exact=False, correction=True) return pval
def p_value_mcNemar(y_test, y_pred1, y_pred2): # contingency table ct = np.zeros((2, 2)) for k, y in enumerate(y_test): if y == y_pred1[k] and y == y_pred2[k]: ct[0, 0] += 1 elif y != y_pred1[k] and y == y_pred2[k]: ct[1, 0] += 1 elif y != y_pred1[k] and y != y_pred2[k]: ct[1, 1] += 1 elif y == y_pred1[k] and y != y_pred2[k]: ct[0, 1] += 1 print(ct) pd_ct = pd.DataFrame(ct, columns=['C2 correct', 'C2 incorrect'], index=['C1 correct', 'C1 incorrect']) plt.figure(figsize=(14, 7)) plt.title("Contingency table") sn.set(font_scale=3.0) # Adjust to fit sn.heatmap(pd_ct, annot=True, fmt='g') plt.show() print(pd_ct) result = mcnemar(table=ct, exact=False, correction=True) print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
def run_mcnemar(baseline_pred, experiment_pred, y_test): # # McNemar's Test (Significance) # In[ ]: a = 0 b = 0 # Baseline correct, experiment incorrect c = 0 # Baseline incorrect, experiment correct d = 0 for b_pred, ex_pred, true in zip(baseline_pred, experiment_pred, y_test): if b_pred == true and ex_pred == true: a += 1 elif b_pred == true and ex_pred != true: b += 1 elif b_pred != true and ex_pred == true: c += 1 else: d += 1 table = [[a, b], [c, d]] # Example of calculating the mcnemar test # calculate mcnemar test result = mcnemar(table, exact=False, correction=False) # summarize the finding #print('statistic=%.3f, p-value=%.6f' % (result.statistic, result.pvalue)) # interpret the p-value alpha = 0.05 if result.pvalue > alpha: print('Same proportions of errors (fail to reject H0)') else: print('Different proportions of errors (reject H0)') return result
def core(tsx, tsy): ''' input -------- tsx: 定类型数据 tsy: 定类型数据 ''' crosstab = pd.crosstab(tsx, tsy) crosstab2 = pd.crosstab(tsx, tsy, margins=True) crosstab2 = crosstab2.rename(columns={'All': '总计'}, index={'All': '总计'}) if crosstab.shape == (2, 2): res = contingency_tables.mcnemar(crosstab) method = 'mcnemar' else: res = contingency_tables.SquareTable(crosstab).symmetry( method="bowker") method = 'bowker' chi2 = res.statistic p = res.pvalue expected = stats.contingency.expected_freq(crosstab) dfe = pd.DataFrame(expected, columns=tsy.unique(), index=tsx.unique()).round(3) dfte = crosstab.astype(str) + ' (' + dfe.astype(str) + ')' dfte['总计'] = crosstab2['总计'] dfte.loc['总计'] = crosstab2.loc['总计'] dfte['检验方法'] = method dfte['卡方统计量'] = chi2 dfte['p-值'] = p dfte.index.name = '类别' return dfte.reset_index().set_index(['检验方法', '卡方统计量', 'p-值', '类别'])
def compute_power(prob_table, dataset_size, alpha=0.05, r=5000): """ Dallas Card et al. "With Little Power Comes Great Responsibility" https://colab.research.google.com/drive/1anaS-9ElouZhUgCAYQt8jy8qBiaXnnK1?usp=sharing#scrollTo=OCz-VAm_ifqZ """ if prob_table[0, 1] == prob_table[1, 0]: raise RuntimeError("Power is undefined when the true effect is zero.") pvals = [] diffs = [] for i in trange(r): # number of simulations sample = np.random.multinomial(n=dataset_size, pvals=prob_table.reshape( (4, ))).reshape((2, 2)) acc_diff = (sample[0, 1] - sample[1, 0]) / dataset_size test_results = mcnemar(sample) pvals.append(test_results.pvalue) diffs.append(acc_diff) true_diff = prob_table[0, 1] - prob_table[1, 0] true_sign = np.sign(true_diff) sig_diffs = [d for i, d in enumerate(diffs) if pvals[i] <= alpha] power = (len([ d for i, d in enumerate(diffs) if pvals[i] <= alpha and np.sign(d) == true_sign ]) / r) mean_effect = np.mean(diffs) type_m = np.mean(np.abs(sig_diffs) / np.abs(true_diff)) type_s = np.mean(np.sign(sig_diffs) != true_sign) return power, mean_effect, type_m, type_s
def binomial_paired_mcnemartest(data_bin_1, data_bin_2): alpha = 0.05 / 4 # from https://machinelearningmastery.com/mcnemars-test-for-machine-learning/ # build up contigency table assuming data is ordered by the test_idx success_1_success_2 = np.count_nonzero( np.logical_and(data_bin_1, data_bin_2)) failed_1_failed_2 = np.count_nonzero( np.logical_and(np.logical_not(data_bin_1), np.logical_not(data_bin_2))) success_1_failed_2 = np.count_nonzero( np.logical_and(data_bin_1, np.logical_not(data_bin_2))) failed_1_success_2 = np.count_nonzero( np.logical_and(np.logical_not(data_bin_1), data_bin_2)) contingency_table = [[success_1_success_2, success_1_failed_2], [failed_1_success_2, failed_1_failed_2]] # otherwise warning in mcnemar function and unmeaningful case if (success_1_failed_2 + failed_1_success_2) == 0: return False, 1 # calculate mcnemar test result = mcnemar(contingency_table, exact=False) if result.pvalue <= alpha: return True, result.pvalue else: return False, result.pvalue
def main(): parser = ArgumentParser() parser.add_argument("-s", "--seed", dest="seed", metavar="INT", type=int, default=None, help="random seed") parser.add_argument("--random", dest="random", action="store_true", default=False) parser.add_argument("--freq", dest="most_frequent", action="store_true", default=False) parser.add_argument("--cvn", metavar="INT", type=int, default=10) parser.add_argument("langs", metavar="LANGS", default=None) parser.add_argument("f1", metavar="LANGS2 PREFIX", default=None) parser.add_argument("f2", metavar="LANGS2 PREFIX", default=None) args = parser.parse_args() if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) langs = list(load_json_stream(open(args.langs))) mat = np.zeros((2, 2), dtype=np.int32) for cvi in range(args.cvn): fp1 = args.f1.format(cvi) fp2 = args.f2.format(cvi) sys.stderr.write("processsing {} and {}\n".format(fp1, fp2)) filled_langs1 = list(load_json_stream(open(fp1))) filled_langs2 = list(load_json_stream(open(fp2))) mat += eval_mv(filled_langs1, filled_langs2, langs) print(mat) bunch = mcnemar(mat, exact=False) print("mcnemar\t{}".format(bunch))
def mcnemar_test(data_df, var1, var2): """Test of difference between two paired binary variables.""" data_copy_df = data_df.copy() # First we want to compute the contingency table values_list = data_copy_df[var1].value_counts().index.tolist() indexes = pd.MultiIndex.from_product([[var1], values_list]) columns = pd.MultiIndex.from_product([[var2], values_list]) contingency_table_df = pd.DataFrame(columns=columns, index=indexes) for value1 in values_list: for value2 in values_list: contingency_table_df.loc[(var1, value1), (var2, value2)] = len( data_copy_df.loc[(data_copy_df[var1] == value1) & (data_copy_df[var2] == value2)]) display(contingency_table_df) # Then we use the McNemar test on it with the assumption that it's the same distribution mcnemar_results = mcnemar(contingency_table_df.values) mcnemar_results_df = pd.DataFrame(columns=['statistic', 'p_value']) mcnemar_results_df.loc[var1 + ' vs ' + var2, 'statistic'] = mcnemar_results.statistic mcnemar_results_df.loc[var1 + ' vs ' + var2, 'p_value'] = mcnemar_results.pvalue display(mcnemar_results_df)
def make_stats(a1, b1, gd1, n, tag): al_train = np.array(0) bl_train = np.array(0) al_out = np.array(0) bl_out = np.array(0) for key, value in gd1.items(): a_out = a1[key][n:] b_out = b1[key][n:] gd_out = gd1[key][n:] a_train = a1[key][:n] b_train = b1[key][:n] gd_train = gd1[key][:n] at_out = np.zeros(len(a_out)) bt_out = np.zeros(len(b_out)) at_train = np.zeros(len(a_train)) bt_train = np.zeros(len(b_train)) at_out[a_out == gd_out] = 1 bt_out[b_out == gd_out] = 1 at_train[a_train == gd_train] = 1 bt_train[b_train == gd_train] = 1 al_out = np.append(al_out, at_out) bl_out = np.append(bl_out, bt_out) al_train = np.append(al_train, at_train) bl_train = np.append(bl_train, bt_train) c_out = mcnemar(confusion_matrix(al_out, bl_out, labels=(0,1))) result_mc_out = {'tag': tag, 'set': 'out-of-sample', 'sample-count': len(al_out),'type': 'McNemar', 'pvalue': c_out.pvalue, 'statistic': c_out.statistic } c_train = mcnemar(confusion_matrix(al_train, bl_train, labels=(0,1))) result_mc_train = {'tag': tag, 'set': 'train', 'sample-count': len(al_train), 'type': 'McNemar', 'pvalue': c_train.pvalue, 'statistic': c_train.statistic } stats_out, pvalue_out = ttest_rel(al_out, bl_out, alternative='greater') result_st_out = {'tag': tag, 'set': 'out-of-sample', 'sample-count': len(al_out), 'type': 'Ttest', 'pvalue': pvalue_out, 'statistic': stats_out } stats_train, pvalue_train = ttest_rel(al_train, bl_train, alternative='greater') result_st_train = {'tag': tag, 'set': 'train', 'sample-count': len(al_train), 'type': 'Ttest', 'pvalue': pvalue_train, 'statistic': stats_train } return result_st_train, result_st_out, result_mc_train, result_mc_out
def test_mcnemar(): # Use chi^2 without continuity correction b1 = ctab.mcnemar(tables[0], exact=False, correction=False) st = sm.stats.SquareTable(tables[0]) b2 = st.homogeneity() assert_allclose(b1.statistic, b2.statistic) assert_equal(b2.df, 1) # Use chi^2 with continuity correction b3 = ctab.mcnemar(tables[0], exact=False, correction=True) assert_allclose(b3.pvalue, r_results.loc[0, "homog_cont_p"]) # Use binomial reference distribution b4 = ctab.mcnemar(tables[0], exact=True) assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])
def calculate_mcnemars_test(hyperpartisan_valid_predictions, joint_valid_predictions): contingency_table = create_contingency_table( hyperpartisan_valid_targets, hyperpartisan_valid_predictions, joint_valid_predictions) result = mcnemar(contingency_table, exact=True) return result.pvalue
def test_mcnemar(): # Use chi^2 without continuity correction b1 = ctab.mcnemar(tables[0], exact=False, correction=False) st = sm.stats.SquareTable(tables[0]) b2 = st.homogeneity() assert_allclose(b1.statistic, b2.statistic) assert_equal(b2.df, 1) # Use chi^2 with continuity correction b3 = ctab.mcnemar(tables[0], exact=False, correction=True) assert_allclose(b3.pvalue, r_results.loc[0, "homog_cont_p"]) # Use binomial reference distribution b4 = ctab.mcnemar(tables[0], exact=True) assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])
def main(): torch.multiprocessing.set_sharing_strategy('file_system') torchaudio.set_audio_backend('sox_io') hack_isinstance() # get config and arguments mode, args1, config1, args2, config2 = get_ttest_args() # Fix seed and make backends deterministic random.seed(args1.seed) np.random.seed(args1.seed) torch.manual_seed(args1.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args1.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False tester1 = Tester(args1, config1) records1 = eval(f'tester1.{args1.mode}')() average1, sample_metric1 = process_records(records1, args1.evaluate_metric) tester2 = Tester(args2, config2) records2 = eval(f'tester2.{args2.mode}')() average2, sample_metric2 = process_records(records2, args2.evaluate_metric) if mode == 'ttest': statistic, p_value = stats.ttest_rel(sample_metric1, sample_metric2) elif mode == 'fisher': correct1 = sample_metric1.count(True) correct2 = sample_metric2.count(True) contingency_table = [[correct1, correct2], [ len(sample_metric1) - correct1, len(sample_metric2) - correct2 ]] statistic, p_value = stats.fisher_exact(contingency_table) elif mode == 'mcnemar': correct1 = sample_metric1.count(True) correct2 = sample_metric2.count(True) contingency_table = [[correct1, correct2], [ len(sample_metric1) - correct1, len(sample_metric2) - correct2 ]] b = mcnemar(contingency_table, exact=True) statistic, p_value = b.statistic, b.pvalue else: raise NotImplementedError print( f'[Runner] - The testing scores of the two ckpts are {average1} and {average2}, respectively.' ) print( f'[Runner] - The statistic of the significant test of the two ckpts is {statistic}' ) print( f'[Runner] - The P value of significant test of the two ckpts is {p_value}' )
def main(args): table = np.zeros((2, 2)) for (gold, norm_a, norm_b) in zip(args.reffile, args.norm_a, args.norm_b): gold = get_norm(gold) norm_a = get_norm(norm_a) norm_b = get_norm(norm_b) table[int(norm_a == gold)][int(norm_b == gold)] += 1 print(table) print(mcnemar(table))
def print_p_values(A, B, GT, langset): # [[A & B, A & ~B], [~A & B, ~A & ~B]] print('LANG', 'PVALUE', sep='\t') for lang in langset: contingency = [[0, 0], [0, 0]] for item in GT[lang]: i = 0 if item in A[lang] else 1 j = 0 if item in B[lang] else 1 contingency[i][j] += 1 print(lang, mcnemar(contingency).pvalue, sep='\t')
def test_cochranq(): """ library(CVST) table1 = matrix(c(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0), ncol=4, byrow=TRUE) rslt1 = cochranq.test(table1) table2 = matrix(c(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0), ncol=5, byrow=TRUE) rslt2 = cochranq.test(table2) """ table = [[1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 0], [0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1], [0, 1, 0, 0]] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 4.2) assert_allclose(df, 3) table = [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 0]] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 1.2174, rtol=1e-4) assert_allclose(df, 4) # Cochran's q and Mcnemar are equivalent for 2x2 tables data = table[:, 0:2] xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1])) b1 = ctab.cochrans_q(data, return_object=True) b2 = ctab.mcnemar(xtab, exact=False, correction=False) assert_allclose(b1.statistic, b2.statistic) assert_allclose(b1.pvalue, b2.pvalue) # Test for printing bunch assert_equal(str(b1).startswith("df 1\npvalue 0.65"), True)
def main(): if not (os.path.exists("/home/xvpher/Intern_Project/Dataset/spamdata.csv") ): print("data file not found") return df = pd.read_csv("/home/xvpher/Intern_Project/Dataset/spamdata.csv") col_names = df.columns col_names = col_names[0:len(col_names) - 1] features = df.loc[:, col_names].values labels = df.loc[:, 'spam'].values X_train, X_test, y_train, y_test = model_selection.train_test_split( features, labels, test_size=0.25, shuffle=True, random_state=3) models = [] names = [] models.append( ('LR', LogisticRegression(solver='lbfgs', max_iter=2000, tol=0.0001))) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('DTC', DecisionTreeClassifier())) models.append(('KNC', KNeighborsClassifier())) models.append(('MNB', MultinomialNB())) models.append(('RFC', RandomForestClassifier(n_estimators=100))) models.append(('SVC', SVC(gamma='scale', kernel='rbf'))) predictions = pd.DataFrame(data=y_test, columns=['y_test']) for name, model in models: model.fit(X_train, y_train) pred = model.predict(X_test) predictions[name] = pred names.append(name) for k in range(7): for j in range(k + 1, 7): table = numpy.zeros((2, 2), dtype=numpy.int64) for i in range(len(y_test)): a = int(not (predictions.loc[i, 'y_test'] ^ predictions.iloc[i, k + 1])) b = int(not (predictions.loc[i, 'y_test'] ^ predictions.iloc[i, j + 1])) # predictions.loc[i,'score1'] = a # predictions.loc[i,'score2'] = b if (a == 1 and b == 1): table[0][0] += 1 elif (a == 1 and b == 0): table[0][1] += 1 elif (a == 0 and b == 1): table[1][0] += 1 else: table[1][1] += 1 score = mcnemar(table, exact=False) print("-------({},{})--------".format(names[k], names[j])) print(table) print(score)
def test_mcnemar_chisquare(): f_obs1 = np.array([[101, 121], [59, 33]]) f_obs2 = np.array([[101, 70], [59, 33]]) f_obs3 = np.array([[101, 80], [59, 33]]) #> mcn = mcnemar.test(matrix(c(101, 121, 59, 33),nrow=2)) res1 = [2.067222e01, 5.450095e-06] res2 = [0.7751938, 0.3786151] res3 = [2.87769784, 0.08981434] stat = mcnemar(f_obs1, exact=False) assert_allclose([stat.statistic, stat.pvalue], res1, rtol=1e-6) stat = mcnemar(f_obs2, exact=False) assert_allclose([stat.statistic, stat.pvalue], res2, rtol=1e-6) stat = mcnemar(f_obs3, exact=False) assert_allclose([stat.statistic, stat.pvalue], res3, rtol=1e-6) # test correction = False res1 = [2.135556e01, 3.815136e-06] res2 = [0.9379845, 0.3327967] res3 = [3.17266187, 0.07488031] res = mcnemar(f_obs1, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res1, rtol=1e-6) res = mcnemar(f_obs2, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res2, rtol=1e-6) res = mcnemar(f_obs3, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res3, rtol=1e-6)
def test_mcnemar_exact(): f_obs1 = np.array([[101, 121], [59, 33]]) f_obs2 = np.array([[101, 70], [59, 33]]) f_obs3 = np.array([[101, 80], [59, 33]]) f_obs4 = np.array([[101, 30], [60, 33]]) f_obs5 = np.array([[101, 10], [30, 33]]) f_obs6 = np.array([[101, 10], [10, 33]]) #vassar college online computation res1 = 0.000004 res2 = 0.378688 res3 = 0.089452 res4 = 0.00206 res5 = 0.002221 res6 = 1. stat = mcnemar(f_obs1, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res1], decimal=6) stat = mcnemar(f_obs2, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res2], decimal=6) stat = mcnemar(f_obs3, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res3], decimal=6) stat = mcnemar(f_obs4, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [30, res4], decimal=6) stat = mcnemar(f_obs5, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [10, res5], decimal=6) stat = mcnemar(f_obs6, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [10, res6], decimal=6)
def test_mcnemar_exact(): f_obs1 = np.array([[101, 121], [59, 33]]) f_obs2 = np.array([[101, 70], [59, 33]]) f_obs3 = np.array([[101, 80], [59, 33]]) f_obs4 = np.array([[101, 30], [60, 33]]) f_obs5 = np.array([[101, 10], [30, 33]]) f_obs6 = np.array([[101, 10], [10, 33]]) #vassar college online computation res1 = 0.000004 res2 = 0.378688 res3 = 0.089452 res4 = 0.00206 res5 = 0.002221 res6 = 1. stat = mcnemar(f_obs1, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res1], decimal=6) stat = mcnemar(f_obs2, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res2], decimal=6) stat = mcnemar(f_obs3, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [59, res3], decimal=6) stat = mcnemar(f_obs4, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [30, res4], decimal=6) stat = mcnemar(f_obs5, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [10, res5], decimal=6) stat = mcnemar(f_obs6, exact=True) assert_almost_equal([stat.statistic, stat.pvalue], [10, res6], decimal=6)
def test_mcnemar_chisquare(): f_obs1 = np.array([[101, 121], [59, 33]]) f_obs2 = np.array([[101, 70], [59, 33]]) f_obs3 = np.array([[101, 80], [59, 33]]) #> mcn = mcnemar.test(matrix(c(101, 121, 59, 33),nrow=2)) res1 = [2.067222e01, 5.450095e-06] res2 = [0.7751938, 0.3786151] res3 = [2.87769784, 0.08981434] stat = mcnemar(f_obs1, exact=False) assert_allclose([stat.statistic, stat.pvalue], res1, rtol=1e-6) stat = mcnemar(f_obs2, exact=False) assert_allclose([stat.statistic, stat.pvalue], res2, rtol=1e-6) stat = mcnemar(f_obs3, exact=False) assert_allclose([stat.statistic, stat.pvalue], res3, rtol=1e-6) # test correction = False res1 = [2.135556e01, 3.815136e-06] res2 = [0.9379845, 0.3327967] res3 = [3.17266187, 0.07488031] res = mcnemar(f_obs1, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res1, rtol=1e-6) res = mcnemar(f_obs2, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res2, rtol=1e-6) res = mcnemar(f_obs3, exact=False, correction=False) assert_allclose([res.statistic, res.pvalue], res3, rtol=1e-6)
def do_mcnemar(fs1, fs2): path = './output/' pin1 = os.path.join(path + 'SVM_gold_versus_pred_30d_' + fs1 + '.pickle') pin2 = os.path.join(path + 'SVM_gold_versus_pred_30d_' + fs2 + '.pickle') df1 = pickle.load(open(pin1, 'rb')) df2 = pickle.load(open(pin2, 'rb')) df1['correct'] = df1['gold_' + fs1] == df1['pred_' + fs1] df2['correct'] = df2['gold_' + fs2] == df2['pred_' + fs2] table = np.zeros((2,2)) """ | c2_correct | c2_incorrect c1_correct | | c1_incorrect | | """ for i in range(len(df1)): v1 = df1.iloc[i].correct v2 = df2.iloc[i].correct # c1_correct, c2_incorrect if v1 and not v2: table[0, 1] += 1 # c1_correct, c2_correct if v1 and v2: table[0, 0] += 1 # c1_incorrect, c2_correct if not v1 and v2: table[1, 0] += 1 # c1_incorrect, c2_incorrect if not v1 and not v2: table[1, 1] += 1 result = mcnemar(table, exact=True) # summarize the finding print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue)) # interpret the p-value alpha = 0.05 if result.pvalue > alpha: print('Same proportions of errors (fail to reject H0) for', fs1, fs2) else: print('Different proportions of errors (reject H0) for', fs1, fs2) return fs1, fs2, alpha, result.pvalue
def run_mcnemar(baseline_pred, experiment_pred, y_test): """ McNemar's Test (Significance) http://www.atyun.com/25532.html It is a statistical evaluation of paired nominal data or classifiers. There are totally 2 tests constucting 2x2 contingency tables. / | Test2 Pos Test2 Neg | Test1 Pos| | Test1 Neg| | Eg. Before grad w. girl w/o girl After w. girl| 5(A) 18(B) Grad w/o girl| 5(C) 22(D) H0 => The number of grads with girl is the same as that after grad. Ha => The number of grads with girl is the diff from that after grad. Chi-Square Distribution It wants to know whether two distributions are different only because of the random noise (Null hypothesis). """ a = 0 b = 0 # Baseline correct, experiment incorrect c = 0 # Baseline incorrect, experiment correct d = 0 for b_pred, ex_pred, true in zip(baseline_pred, experiment_pred, y_test): if b_pred == true and ex_pred == true: a += 1 elif b_pred == true and ex_pred != true: b += 1 elif b_pred != true and ex_pred == true: c += 1 else: d += 1 table = [[a, b], [c, d]] # Example of calculating the mcnemar test # calculate mcnemar test result = mcnemar(table, exact=False, correction=False) # summarize the finding #print('statistic=%.3f, p-value=%.6f' % (result.statistic, result.pvalue)) # interpret the p-value alpha = 0.05 if result.pvalue > alpha: print('Same proportions of errors (fail to reject H0)') else: print('Different proportions of errors (reject H0)') return result
def perform_mcnemar_test(qid_to_agreements1, qid_to_agreements2): table = np.zeros((2, 2)) qids = list(qid_to_agreements1.keys()) for qid in qids: for _agree1, _agree2 in zip(qid_to_agreements1[qid], qid_to_agreements2[qid]): if _agree1 > 0 and _agree2 > 0: table[0][0] += 1 elif _agree1 > _agree2: table[0][1] += 1 elif _agree2 > _agree1: table[1][0] += 1 else: table[1][1] += 1 results = mcnemar(table, exact=False, correction=True) return table, results.pvalue
def mcnemar(self, Features=None, Clstrs=None): """ McNemar test of homogeneity. Parameters ---------- Features: 2D_array_like The arrays must have the same shape, except in the dimension Clstrs: array_like Returns ---------- statistic: float or int, array The test statistic is the chisquare statistic if exact is false. If the exact binomial distribution is used, then this contains the min(n1, n2), where n1, n2 are cases that are zero in one sample but one in the other sample. pvalue: float or array p-value of the null hypothesis of equal marginal distributions. Notes ---------- This is a special case of Cochran’s Q test, and of the homogeneity test. The results when the chisquare distribution is used are identical, except for continuity correction. """ if Features is None: Features = self.__data.columns[:-1].copy() if Clstrs is None: Clstrs = self.__data["Clusters"].copy() Clstrs = Clstrs.dropna().unique().tolist() Clstrs.sort() for feature in Features: print("\n\n", feature,"\n") sub = self.__data[[feature, "Clusters"]].copy() sub = sub.dropna() sub_df = [] for cluster in Clstrs: sub_df.append(sub[sub["Clusters"] == cluster][feature].values.tolist()) for i, data_i in enumerate(sub_df): for j in range(i+1, len(sub_df)): table = [sub_df[i], sub_df[j]] res = contingency_tables.mcnemar(table) if res.pvalue < 0.05: print("The feature", feature, "is significant for clusters", Clstrs[i], "and", Clstrs[j]) else: print("The feature", feature, "is not significant for clusters", Clstrs[i], "and", Clstrs[j])
def reject_null(misclf, total_examples, clf_name1, clf_name2): size = intersection_size(misclf[clf_name1], misclf[clf_name2]) a = [[0, 0], [0, 0]] # misclassified by both a[0][0] = size # misclassified by A a[0][1] = len(misclf[clf_name1]) - size # misclassified by B a[1][0] = len(misclf[clf_name2]) - size # not misclassified by A or B a[1][1] = total_examples - a[0][0] - a[0][1] - a[1][0] result = mcnemar(a, exact=True) #print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue)) #alpha = 0.05 print(clf_name1 + " v " + clf_name2 + " " + str(result.pvalue)) return result.pvalue
def run_mcnemar_test(report_df: pd.DataFrame) -> Tuple[float, float]: mask_correct_0 = report_df.loc[:, "isCorrect_0"] mask_correct_1 = report_df.loc[:, "isCorrect_1"] contingency_table = ( ( (mask_correct_0 & mask_correct_1).sum(), (mask_correct_0 & ~mask_correct_1).sum(), ), ( (~mask_correct_0 & mask_correct_1).sum(), (~mask_correct_0 & ~mask_correct_1).sum(), ), ) result = mcnemar(contingency_table) return result.statistic, result.pvalue
def mc_nemar(y_rbf, y_linear): table = pd.crosstab(pd.Series(y_rbf), pd.Series(y_linear), rownames=["y_rbf"], colnames=["y_linear"], dropna=False) #icine array de aliyor # calculate mcnemar test result = mcnemar(table, exact=True) # summarize the finding print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue)) # interpret the p-value alpha = 0.05 if result.pvalue >= alpha: return 0 else: return 1
def mcnemar_quotes(self): """ Run McNemar test on quotes. """ a = 0 # Both correct b = 0 # Baseline correct, experiment incorrect c = 0 # Baseline incorrect, experiment correct d = 0 # Both incorrect for baseline_quote, experimental_quote, gold_quote in zip( self.ordered_predictions['quotes']['baseline'], self.ordered_predictions['quotes']['experimental'], self.ordered_predictions['quotes']['gold']): if utils.characters_match( baseline_quote.speaker, gold_quote.speaker) and utils.characters_match( experimental_quote.speaker, gold_quote.speaker): a += 1 elif utils.characters_match( baseline_quote.speaker, gold_quote.speaker) and not utils.characters_match( experimental_quote.speaker, gold_quote.speaker): b += 1 elif not utils.characters_match( baseline_quote.speaker, gold_quote.speaker) and utils.characters_match( experimental_quote.speaker, gold_quote.speaker): c += 1 else: d += 1 table = [[a, b], [c, d]] # Example of calculating the mcnemar test # calculate mcnemar test result = mcnemar(table, correction=False) # summarize the finding print('statistic=%.3f, p-value=%.6f' % (result.statistic, result.pvalue)) # interpret the p-value alpha = 0.05 if result.pvalue > alpha: print('Same proportions of errors (fail to reject H0)') else: print('Different proportions of errors (reject H0)') return result
def test_cochranq(): # library(CVST) # table1 = matrix(c(1, 0, 1, 1, # 0, 1, 1, 1, # 1, 1, 1, 0, # 0, 1, 0, 0, # 0, 1, 0, 0, # 1, 0, 1, 0, # 0, 1, 0, 0, # 1, 1, 1, 1, # 0, 1, 0, 0), ncol=4, byrow=TRUE) # rslt1 = cochranq.test(table1) # table2 = matrix(c(0, 0, 1, 1, 0, # 0, 1, 0, 1, 0, # 0, 1, 1, 0, 1, # 1, 0, 0, 0, 1, # 1, 1, 0, 0, 0, # 1, 0, 1, 0, 0, # 0, 1, 0, 0, 0, # 0, 0, 1, 1, 0, # 0, 0, 0, 0, 0), ncol=5, byrow=TRUE) # rslt2 = cochranq.test(table2) table = [[1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 0], [0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1], [0, 1, 0, 0]] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 4.2) assert_allclose(df, 3) table = [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 0]] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 1.2174, rtol=1e-4) assert_allclose(df, 4) # Cochran's q and Mcnemar are equivalent for 2x2 tables data = table[:, 0:2] xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1])) b1 = ctab.cochrans_q(data, return_object=True) b2 = ctab.mcnemar(xtab, exact=False, correction=False) assert_allclose(b1.statistic, b2.statistic) assert_allclose(b1.pvalue, b2.pvalue) # Test for printing bunch assert_equal(str(b1).startswith("df 1\npvalue 0.65"), True)
def test_cochranq(): """ library(CVST) table1 = matrix(c(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0), ncol=4, byrow=TRUE) rslt1 = cochranq.test(table1) table2 = matrix(c(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0), ncol=5, byrow=TRUE) rslt2 = cochranq.test(table2) """ table = [ [1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 0], [0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1], [0, 1, 0, 0], ] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 4.2) assert_allclose(df, 3) table = [ [0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 0], ] table = np.asarray(table) stat, pvalue, df = ctab.cochrans_q(table, return_object=False) assert_allclose(stat, 1.2174, rtol=1e-4) assert_allclose(df, 4) # Cochran's q and Mcnemar are equivalent for 2x2 tables data = table[:, 0:2] xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1])) b1 = ctab.cochrans_q(data, return_object=True) b2 = ctab.mcnemar(xtab, exact=False, correction=False) assert_allclose(b1.statistic, b2.statistic) assert_allclose(b1.pvalue, b2.pvalue)