Beispiel #1
0
def mcnemar_test(p1, p2):
    con_table = [[0, 0], [0, 0]]
    for i, p1_val in enumerate(p1):
        p2_val = p2[i]
        if p1_val == True and p2_val == True:
            con_table[0][0] += 1
        elif p1_val == True and p2_val == False:
            con_table[0][1] += 1
        elif p1_val == False and p2_val == True:
            con_table[1][0] += 1
        elif p1_val == False and p2_val == False:
            con_table[1][1] += 1
    print("Contingency table: {}".format(con_table))

    # test statistic must be calculated using binomial distribution if any of the table values are less than 25
    if any(val < 25 for entry in con_table for val in entry):
        print("Some value < 25. Calculating exact p-value")
        result = sm.mcnemar(con_table, exact=True)
    else:
        print("All values >= 25. Calculating standard McNemar's statistic")
        result = sm.mcnemar(con_table, exact=False, correction=True)

    print('statistic=%.3f, p-value=%.5f' % (result.statistic, result.pvalue))
    alpha = 0.05
    if result.pvalue > alpha:
        print('Same proportions of errors (fail to reject H0)')
        return result.statistic, result.pvalue, 'fail'
    else:
        print('Different proportions of errors (reject H0)')
        return result.statistic, result.pvalue, 'reject'
Beispiel #2
0
def mcnemarTest(modelA, modelB, testData, testLabels, confidence=0.95):
    # not complete - need to understand the output of mcnemar function...
    from statsmodels.stats.contingency_tables import mcnemar
    from sklearn.metrics import confusion_matrix

    # get test results
    predictA = np.argmax(modelA.predict(testData), axis=1)
    predictB = np.argmax(modelB.predict(testData), axis=1)
    actual = np.argmax(testLabels, axis=1)
    resultsA = predictA == actual
    resultsB = predictB == actual

    # build confusion matrix
    confMatrix = confusion_matrix(resultsA, resultsB)

    # check if all values in confusion matrix > 25 (only interested in Yes/No and No/Yes fields)
    if (confMatrix[0, 1] > 25) and (confMatrix[1, 0] > 25):
        standardTest = True
    else:
        standardTest = False

    # do McNemar test
    if standardTest == True:
        stats = mcnemar(confMatrix, exact=False, correction=True)
    else:
        stats = mcnemar(confMatrix, exact=True)

    pval = stats.pvalue
    print "### McNemar Test ###"
    print confMatrix
    print "Pvalue: {}".format(pval)
    if pval < (1 - confidence):
        print "Significant difference (reject null hypothesis)"
    else:
        print "No significant difference (accept null hypothesis)"
def test_mannwhithney(predfile1, predfile2, testfile, testfile2):
    y_true1, y_pred1, y_true_prec1, y_pred_prec1 = evaluate(
        testfile, predfile1)
    y_true2, y_pred2, y_true_prec2, y_pred_prec2 = evaluate(
        testfile2, predfile2)
    print('\n First model: ', predfile1)
    print('Ex: ', y_pred1[:10], ' Len: ', len(y_pred1))
    print('Second model: ', predfile2)
    print('Ex: ', y_pred2[:10], ' Len: ', len(y_pred2))
    print(
        'Is testset the same? ',
        len([
            i for i in np.equal(np.array(y_true1), np.array(y_true2))
            if i is False
        ]))

    mc_tb = mcnemar_table(y_target=np.array(y_true1),
                          y_model1=np.array(y_pred1),
                          y_model2=np.array(y_pred2))
    print('Contingency table: ', mc_tb)
    mcnemar_res = mcnemar(mc_tb)
    print('McNemar:  p value: {:.20f}'.format(mcnemar_res.pvalue))
    chi2, p = mlx_mcnemar(ary=mc_tb, corrected=True)
    print('McNemar: chi:{:.4f}  p value: {}'.format(chi2, p))
    mc_tb_prec = mcnemar_table(y_target=np.array(y_true_prec1),
                               y_model1=np.array(y_pred_prec1),
                               y_model2=np.array(y_pred_prec2))
    mcnemar_res_prec = mcnemar(mc_tb_prec)
    print('McNemar PRECISION:  p value: {}'.format(mcnemar_res_prec.pvalue))
Beispiel #4
0
def Comparing_classifiers(this: ClassificationReport,
                          that: ClassificationReport):
    this_that_conf_mat = confusion_matrix(this.y_test_pred, that.y_test_pred)

    print()
    print(120 * '-')
    print(f'Comparing {this.classifier} with {that.classifier}')
    print(60 * '-', 'Confusion matrix', 60 * '-')
    print(this_that_conf_mat)
    # print(this_that_conf_mat.shape)
    print(
        f'Null Hypothesis H0 = Both {this.classifier} and {that.classifier} have predictions that are similar and make errors in much the same proportion'
    )
    if this_that_conf_mat.shape[0] > 1:
        print(
            f'Null Hypothesis H0 = Both {this.classifier} and {that.classifier} have predictions that are similar and make errors in much the same proportion'
        )
        result = mcnemar(this_that_conf_mat, exact=False)
        print('statistic=%.3f, p-value=%.3f' %
              (result.statistic, result.pvalue))
        alpha = 0.01
        if result.pvalue > alpha:
            print(
                f'Since P Value is greater than {alpha}, thus Same proportions of errors (fail to reject H0)'
            )
        else:
            print(
                f'Since P Value is less than {alpha}, thus Different proportions of errors (reject H0)'
            )
    else:
        print('Confusion matrix cannot be created as they produce same result')
        print('Same proportions of errors (fail to reject H0)')
    print(120 * '-')
Beispiel #5
0
def mcnemar_test(in1, in2):
    yes_yes = 0
    yes_no = 0
    no_yes = 0
    no_no = 0

    # Get individual predictions, mark as correct or not
    results1 = contingency_stats(in1)
    results2 = contingency_stats(in2)
    for key in results1.keys():
        res1 = results1[key]
        res2 = results2[key]
        if res1 + res2 == 2:
            yes_yes += 1
        elif res1 + res2 == 0:
            no_no += 1
        elif res1 == 1 and res2 == 0:
            yes_no += 1
        elif res1 == 0 and res2 == 1:
            no_yes += 1

    # Construct contingency table
    table = [[yes_yes, yes_no], [no_yes, no_no]]
    table = []
    stat, pval = mcnemar(table, exact=False, correction=True)
    return pval
Beispiel #6
0
def p_value_mcNemar(y_test, y_pred1, y_pred2):
    #     contingency table
    ct = np.zeros((2, 2))
    for k, y in enumerate(y_test):
        if y == y_pred1[k] and y == y_pred2[k]:
            ct[0, 0] += 1
        elif y != y_pred1[k] and y == y_pred2[k]:
            ct[1, 0] += 1
        elif y != y_pred1[k] and y != y_pred2[k]:
            ct[1, 1] += 1
        elif y == y_pred1[k] and y != y_pred2[k]:
            ct[0, 1] += 1

    print(ct)
    pd_ct = pd.DataFrame(ct,
                         columns=['C2 correct', 'C2 incorrect'],
                         index=['C1 correct', 'C1 incorrect'])
    plt.figure(figsize=(14, 7))
    plt.title("Contingency table")
    sn.set(font_scale=3.0)  # Adjust to fit
    sn.heatmap(pd_ct, annot=True, fmt='g')
    plt.show()
    print(pd_ct)
    result = mcnemar(table=ct, exact=False, correction=True)
    print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
def run_mcnemar(baseline_pred, experiment_pred, y_test):
    # # McNemar's Test (Significance)
    # In[ ]:
    a = 0
    b = 0  # Baseline correct, experiment incorrect
    c = 0  # Baseline incorrect, experiment correct
    d = 0
    for b_pred, ex_pred, true in zip(baseline_pred, experiment_pred, y_test):
        if b_pred == true and ex_pred == true:
            a += 1
        elif b_pred == true and ex_pred != true:
            b += 1
        elif b_pred != true and ex_pred == true:
            c += 1
        else:
            d += 1

    table = [[a, b], [c, d]]

    # Example of calculating the mcnemar test
    # calculate mcnemar test
    result = mcnemar(table, exact=False, correction=False)
    # summarize the finding
    #print('statistic=%.3f, p-value=%.6f' % (result.statistic, result.pvalue))
    # interpret the p-value
    alpha = 0.05
    if result.pvalue > alpha:
        print('Same proportions of errors (fail to reject H0)')
    else:
        print('Different proportions of errors (reject H0)')

    return result
def core(tsx, tsy):
    '''
    input
    --------
      tsx: 定类型数据
      tsy: 定类型数据
    '''

    crosstab = pd.crosstab(tsx, tsy)
    crosstab2 = pd.crosstab(tsx, tsy, margins=True)
    crosstab2 = crosstab2.rename(columns={'All': '总计'}, index={'All': '总计'})

    if crosstab.shape == (2, 2):
        res = contingency_tables.mcnemar(crosstab)
        method = 'mcnemar'

    else:
        res = contingency_tables.SquareTable(crosstab).symmetry(
            method="bowker")
        method = 'bowker'

    chi2 = res.statistic
    p = res.pvalue
    expected = stats.contingency.expected_freq(crosstab)

    dfe = pd.DataFrame(expected, columns=tsy.unique(),
                       index=tsx.unique()).round(3)
    dfte = crosstab.astype(str) + ' (' + dfe.astype(str) + ')'
    dfte['总计'] = crosstab2['总计']
    dfte.loc['总计'] = crosstab2.loc['总计']
    dfte['检验方法'] = method
    dfte['卡方统计量'] = chi2
    dfte['p-值'] = p
    dfte.index.name = '类别'
    return dfte.reset_index().set_index(['检验方法', '卡方统计量', 'p-值', '类别'])
Beispiel #9
0
def compute_power(prob_table, dataset_size, alpha=0.05, r=5000):
    """
    Dallas Card et al. "With Little Power Comes Great Responsibility"
    https://colab.research.google.com/drive/1anaS-9ElouZhUgCAYQt8jy8qBiaXnnK1?usp=sharing#scrollTo=OCz-VAm_ifqZ
    """
    if prob_table[0, 1] == prob_table[1, 0]:
        raise RuntimeError("Power is undefined when the true effect is zero.")

    pvals = []
    diffs = []
    for i in trange(r):  # number of simulations
        sample = np.random.multinomial(n=dataset_size,
                                       pvals=prob_table.reshape(
                                           (4, ))).reshape((2, 2))
        acc_diff = (sample[0, 1] - sample[1, 0]) / dataset_size
        test_results = mcnemar(sample)
        pvals.append(test_results.pvalue)
        diffs.append(acc_diff)

    true_diff = prob_table[0, 1] - prob_table[1, 0]
    true_sign = np.sign(true_diff)
    sig_diffs = [d for i, d in enumerate(diffs) if pvals[i] <= alpha]
    power = (len([
        d for i, d in enumerate(diffs)
        if pvals[i] <= alpha and np.sign(d) == true_sign
    ]) / r)
    mean_effect = np.mean(diffs)
    type_m = np.mean(np.abs(sig_diffs) / np.abs(true_diff))
    type_s = np.mean(np.sign(sig_diffs) != true_sign)
    return power, mean_effect, type_m, type_s
Beispiel #10
0
def binomial_paired_mcnemartest(data_bin_1, data_bin_2):
    alpha = 0.05 / 4
    # from https://machinelearningmastery.com/mcnemars-test-for-machine-learning/

    # build up contigency table assuming data is ordered by the test_idx
    success_1_success_2 = np.count_nonzero(
        np.logical_and(data_bin_1, data_bin_2))
    failed_1_failed_2 = np.count_nonzero(
        np.logical_and(np.logical_not(data_bin_1), np.logical_not(data_bin_2)))
    success_1_failed_2 = np.count_nonzero(
        np.logical_and(data_bin_1, np.logical_not(data_bin_2)))
    failed_1_success_2 = np.count_nonzero(
        np.logical_and(np.logical_not(data_bin_1), data_bin_2))

    contingency_table = [[success_1_success_2, success_1_failed_2],
                         [failed_1_success_2, failed_1_failed_2]]

    # otherwise warning in mcnemar function and unmeaningful case
    if (success_1_failed_2 + failed_1_success_2) == 0:
        return False, 1

    # calculate mcnemar test
    result = mcnemar(contingency_table, exact=False)
    if result.pvalue <= alpha:
        return True, result.pvalue
    else:
        return False, result.pvalue
Beispiel #11
0
def main():
    parser = ArgumentParser()
    parser.add_argument("-s", "--seed", dest="seed", metavar="INT", type=int, default=None,
                        help="random seed")
    parser.add_argument("--random", dest="random", action="store_true", default=False)
    parser.add_argument("--freq", dest="most_frequent", action="store_true", default=False)
    parser.add_argument("--cvn", metavar="INT", type=int, default=10)
    parser.add_argument("langs", metavar="LANGS", default=None)
    parser.add_argument("f1", metavar="LANGS2 PREFIX", default=None)
    parser.add_argument("f2", metavar="LANGS2 PREFIX", default=None)
    args = parser.parse_args()

    if args.seed is not None:
        np.random.seed(args.seed)
        random.seed(args.seed)

    langs = list(load_json_stream(open(args.langs)))
    mat = np.zeros((2, 2), dtype=np.int32)
    for cvi in range(args.cvn):
        fp1 = args.f1.format(cvi)
        fp2 = args.f2.format(cvi)
        sys.stderr.write("processsing {} and {}\n".format(fp1, fp2))
        filled_langs1 = list(load_json_stream(open(fp1)))
        filled_langs2 = list(load_json_stream(open(fp2)))
        mat += eval_mv(filled_langs1, filled_langs2, langs)
    print(mat)
    bunch = mcnemar(mat, exact=False)
    print("mcnemar\t{}".format(bunch))
Beispiel #12
0
def mcnemar_test(data_df, var1, var2):
    """Test of difference between two paired binary variables."""
    data_copy_df = data_df.copy()

    # First we want to compute the contingency table
    values_list = data_copy_df[var1].value_counts().index.tolist()

    indexes = pd.MultiIndex.from_product([[var1], values_list])
    columns = pd.MultiIndex.from_product([[var2], values_list])

    contingency_table_df = pd.DataFrame(columns=columns, index=indexes)

    for value1 in values_list:
        for value2 in values_list:
            contingency_table_df.loc[(var1, value1), (var2, value2)] = len(
                data_copy_df.loc[(data_copy_df[var1] == value1)
                                 & (data_copy_df[var2] == value2)])

    display(contingency_table_df)

    # Then we use the McNemar test on it with the assumption that it's the same distribution
    mcnemar_results = mcnemar(contingency_table_df.values)
    mcnemar_results_df = pd.DataFrame(columns=['statistic', 'p_value'])

    mcnemar_results_df.loc[var1 + ' vs ' + var2,
                           'statistic'] = mcnemar_results.statistic
    mcnemar_results_df.loc[var1 + ' vs ' + var2,
                           'p_value'] = mcnemar_results.pvalue
    display(mcnemar_results_df)
Beispiel #13
0
def make_stats(a1, b1, gd1, n, tag):
    al_train = np.array(0)
    bl_train = np.array(0)

    al_out = np.array(0)
    bl_out = np.array(0)

    for key, value in gd1.items():
        a_out = a1[key][n:]
        b_out = b1[key][n:]
        gd_out = gd1[key][n:]

        a_train = a1[key][:n]
        b_train = b1[key][:n]
        gd_train = gd1[key][:n]

        at_out = np.zeros(len(a_out))
        bt_out = np.zeros(len(b_out))

        at_train = np.zeros(len(a_train))
        bt_train = np.zeros(len(b_train))

        at_out[a_out == gd_out] = 1
        bt_out[b_out == gd_out] = 1

        at_train[a_train == gd_train] = 1
        bt_train[b_train == gd_train] = 1

        al_out = np.append(al_out, at_out)
        bl_out = np.append(bl_out, bt_out)

        al_train = np.append(al_train, at_train)
        bl_train = np.append(bl_train, bt_train)
        
    c_out = mcnemar(confusion_matrix(al_out, bl_out, labels=(0,1)))
    result_mc_out = {'tag': tag, 'set': 'out-of-sample', 'sample-count': len(al_out),'type': 'McNemar', 'pvalue': c_out.pvalue, 'statistic': c_out.statistic }

    c_train = mcnemar(confusion_matrix(al_train, bl_train, labels=(0,1)))
    result_mc_train = {'tag': tag, 'set': 'train', 'sample-count': len(al_train), 'type': 'McNemar', 'pvalue': c_train.pvalue, 'statistic': c_train.statistic }

    stats_out, pvalue_out = ttest_rel(al_out, bl_out, alternative='greater')
    result_st_out = {'tag': tag, 'set': 'out-of-sample', 'sample-count': len(al_out), 'type': 'Ttest', 'pvalue': pvalue_out, 'statistic': stats_out }

    stats_train, pvalue_train = ttest_rel(al_train, bl_train, alternative='greater')
    result_st_train = {'tag': tag, 'set': 'train', 'sample-count': len(al_train), 'type': 'Ttest', 'pvalue': pvalue_train, 'statistic': stats_train }
    
    return result_st_train, result_st_out, result_mc_train, result_mc_out
Beispiel #14
0
def test_mcnemar():

    # Use chi^2 without continuity correction
    b1 = ctab.mcnemar(tables[0], exact=False, correction=False)

    st = sm.stats.SquareTable(tables[0])
    b2 = st.homogeneity()
    assert_allclose(b1.statistic, b2.statistic)
    assert_equal(b2.df, 1)

    # Use chi^2 with continuity correction
    b3 = ctab.mcnemar(tables[0], exact=False, correction=True)
    assert_allclose(b3.pvalue, r_results.loc[0, "homog_cont_p"])

    # Use binomial reference distribution
    b4 = ctab.mcnemar(tables[0], exact=True)
    assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])
def calculate_mcnemars_test(hyperpartisan_valid_predictions,
                            joint_valid_predictions):
    contingency_table = create_contingency_table(
        hyperpartisan_valid_targets, hyperpartisan_valid_predictions,
        joint_valid_predictions)

    result = mcnemar(contingency_table, exact=True)
    return result.pvalue
def test_mcnemar():

    # Use chi^2 without continuity correction
    b1 = ctab.mcnemar(tables[0], exact=False, correction=False)

    st = sm.stats.SquareTable(tables[0])
    b2 = st.homogeneity()
    assert_allclose(b1.statistic, b2.statistic)
    assert_equal(b2.df, 1)

    # Use chi^2 with continuity correction
    b3 = ctab.mcnemar(tables[0], exact=False, correction=True)
    assert_allclose(b3.pvalue, r_results.loc[0, "homog_cont_p"])

    # Use binomial reference distribution
    b4 = ctab.mcnemar(tables[0], exact=True)
    assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])
Beispiel #17
0
def main():
    torch.multiprocessing.set_sharing_strategy('file_system')
    torchaudio.set_audio_backend('sox_io')
    hack_isinstance()

    # get config and arguments
    mode, args1, config1, args2, config2 = get_ttest_args()

    # Fix seed and make backends deterministic
    random.seed(args1.seed)
    np.random.seed(args1.seed)
    torch.manual_seed(args1.seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(args1.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    tester1 = Tester(args1, config1)
    records1 = eval(f'tester1.{args1.mode}')()
    average1, sample_metric1 = process_records(records1, args1.evaluate_metric)

    tester2 = Tester(args2, config2)
    records2 = eval(f'tester2.{args2.mode}')()
    average2, sample_metric2 = process_records(records2, args2.evaluate_metric)

    if mode == 'ttest':
        statistic, p_value = stats.ttest_rel(sample_metric1, sample_metric2)
    elif mode == 'fisher':
        correct1 = sample_metric1.count(True)
        correct2 = sample_metric2.count(True)
        contingency_table = [[correct1, correct2],
                             [
                                 len(sample_metric1) - correct1,
                                 len(sample_metric2) - correct2
                             ]]
        statistic, p_value = stats.fisher_exact(contingency_table)
    elif mode == 'mcnemar':
        correct1 = sample_metric1.count(True)
        correct2 = sample_metric2.count(True)
        contingency_table = [[correct1, correct2],
                             [
                                 len(sample_metric1) - correct1,
                                 len(sample_metric2) - correct2
                             ]]
        b = mcnemar(contingency_table, exact=True)
        statistic, p_value = b.statistic, b.pvalue
    else:
        raise NotImplementedError

    print(
        f'[Runner] - The testing scores of the two ckpts are {average1} and {average2}, respectively.'
    )
    print(
        f'[Runner] - The statistic of the significant test of the two ckpts is {statistic}'
    )
    print(
        f'[Runner] - The P value of significant test of the two ckpts is {p_value}'
    )
Beispiel #18
0
def main(args):
    table = np.zeros((2, 2))
    for (gold, norm_a, norm_b) in zip(args.reffile, args.norm_a, args.norm_b):
        gold = get_norm(gold)
        norm_a = get_norm(norm_a)
        norm_b = get_norm(norm_b)
        table[int(norm_a == gold)][int(norm_b == gold)] += 1

    print(table)
    print(mcnemar(table))
Beispiel #19
0
def print_p_values(A, B, GT, langset):
    # [[A & B, A & ~B], [~A & B, ~A & ~B]]
    print('LANG', 'PVALUE', sep='\t')
    for lang in langset:
        contingency = [[0, 0], [0, 0]]
        for item in GT[lang]:
            i = 0 if item in A[lang] else 1
            j = 0 if item in B[lang] else 1
            contingency[i][j] += 1
        print(lang, mcnemar(contingency).pvalue, sep='\t')
Beispiel #20
0
def test_cochranq():
    """
    library(CVST)
    table1 = matrix(c(1, 0, 1, 1,
                      0, 1, 1, 1,
                      1, 1, 1, 0,
                      0, 1, 0, 0,
                      0, 1, 0, 0,
                      1, 0, 1, 0,
                      0, 1, 0, 0,
                      1, 1, 1, 1,
                      0, 1, 0, 0), ncol=4, byrow=TRUE)
    rslt1 = cochranq.test(table1)
    table2 = matrix(c(0, 0, 1, 1, 0,
                      0, 1, 0, 1, 0,
                      0, 1, 1, 0, 1,
                      1, 0, 0, 0, 1,
                      1, 1, 0, 0, 0,
                      1, 0, 1, 0, 0,
                      0, 1, 0, 0, 0,
                      0, 0, 1, 1, 0,
                      0, 0, 0, 0, 0), ncol=5, byrow=TRUE)
    rslt2 = cochranq.test(table2)
    """

    table = [[1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 0], [0, 1, 0, 0],
             [0, 1, 0, 0], [1, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1],
             [0, 1, 0, 0]]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 4.2)
    assert_allclose(df, 3)

    table = [[0, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0,
                                                1], [1, 0, 0, 0, 1],
             [1, 1, 0, 0, 0], [1, 0, 1, 0, 0], [0, 1, 0, 0, 0],
             [0, 0, 1, 1, 0], [0, 0, 0, 0, 0]]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 1.2174, rtol=1e-4)
    assert_allclose(df, 4)

    # Cochran's q and Mcnemar are equivalent for 2x2 tables
    data = table[:, 0:2]
    xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1]))
    b1 = ctab.cochrans_q(data, return_object=True)
    b2 = ctab.mcnemar(xtab, exact=False, correction=False)
    assert_allclose(b1.statistic, b2.statistic)
    assert_allclose(b1.pvalue, b2.pvalue)

    # Test for printing bunch
    assert_equal(str(b1).startswith("df          1\npvalue      0.65"), True)
Beispiel #21
0
def main():
    if not (os.path.exists("/home/xvpher/Intern_Project/Dataset/spamdata.csv")
            ):
        print("data file not found")
        return
    df = pd.read_csv("/home/xvpher/Intern_Project/Dataset/spamdata.csv")
    col_names = df.columns
    col_names = col_names[0:len(col_names) - 1]
    features = df.loc[:, col_names].values
    labels = df.loc[:, 'spam'].values
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        features, labels, test_size=0.25, shuffle=True, random_state=3)
    models = []
    names = []
    models.append(
        ('LR', LogisticRegression(solver='lbfgs', max_iter=2000, tol=0.0001)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('DTC', DecisionTreeClassifier()))
    models.append(('KNC', KNeighborsClassifier()))
    models.append(('MNB', MultinomialNB()))
    models.append(('RFC', RandomForestClassifier(n_estimators=100)))
    models.append(('SVC', SVC(gamma='scale', kernel='rbf')))

    predictions = pd.DataFrame(data=y_test, columns=['y_test'])
    for name, model in models:
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        predictions[name] = pred
        names.append(name)

    for k in range(7):
        for j in range(k + 1, 7):
            table = numpy.zeros((2, 2), dtype=numpy.int64)
            for i in range(len(y_test)):
                a = int(not (predictions.loc[i, 'y_test']
                             ^ predictions.iloc[i, k + 1]))
                b = int(not (predictions.loc[i, 'y_test']
                             ^ predictions.iloc[i, j + 1]))
                # predictions.loc[i,'score1'] = a
                # predictions.loc[i,'score2'] = b
                if (a == 1 and b == 1):
                    table[0][0] += 1
                elif (a == 1 and b == 0):
                    table[0][1] += 1
                elif (a == 0 and b == 1):
                    table[1][0] += 1
                else:
                    table[1][1] += 1

            score = mcnemar(table, exact=False)
            print("-------({},{})--------".format(names[k], names[j]))
            print(table)
            print(score)
def test_mcnemar_chisquare():
    f_obs1 = np.array([[101, 121], [59, 33]])
    f_obs2 = np.array([[101, 70], [59, 33]])
    f_obs3 = np.array([[101, 80], [59, 33]])

    #> mcn = mcnemar.test(matrix(c(101, 121,  59,  33),nrow=2))
    res1 = [2.067222e01, 5.450095e-06]
    res2 = [0.7751938, 0.3786151]
    res3 = [2.87769784, 0.08981434]

    stat = mcnemar(f_obs1, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res1, rtol=1e-6)
    stat = mcnemar(f_obs2, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res2, rtol=1e-6)
    stat = mcnemar(f_obs3, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res3, rtol=1e-6)

    # test correction = False
    res1 = [2.135556e01, 3.815136e-06]
    res2 = [0.9379845, 0.3327967]
    res3 = [3.17266187, 0.07488031]

    res = mcnemar(f_obs1, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res1, rtol=1e-6)
    res = mcnemar(f_obs2, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res2, rtol=1e-6)
    res = mcnemar(f_obs3, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res3, rtol=1e-6)
def test_mcnemar_exact():
    f_obs1 = np.array([[101, 121], [59, 33]])
    f_obs2 = np.array([[101, 70], [59, 33]])
    f_obs3 = np.array([[101, 80], [59, 33]])
    f_obs4 = np.array([[101, 30], [60, 33]])
    f_obs5 = np.array([[101, 10], [30, 33]])
    f_obs6 = np.array([[101, 10], [10, 33]])

    #vassar college online computation
    res1 = 0.000004
    res2 = 0.378688
    res3 = 0.089452
    res4 = 0.00206
    res5 = 0.002221
    res6 = 1.
    stat = mcnemar(f_obs1, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res1], decimal=6)
    stat = mcnemar(f_obs2, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res2], decimal=6)
    stat = mcnemar(f_obs3, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res3], decimal=6)
    stat = mcnemar(f_obs4, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [30, res4], decimal=6)
    stat = mcnemar(f_obs5, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [10, res5], decimal=6)
    stat = mcnemar(f_obs6, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [10, res6], decimal=6)
def test_mcnemar_exact():
    f_obs1 = np.array([[101, 121], [59, 33]])
    f_obs2 = np.array([[101,  70], [59, 33]])
    f_obs3 = np.array([[101,  80], [59, 33]])
    f_obs4 = np.array([[101,  30], [60, 33]])
    f_obs5 = np.array([[101,  10], [30, 33]])
    f_obs6 = np.array([[101,  10], [10, 33]])

    #vassar college online computation
    res1 = 0.000004
    res2 = 0.378688
    res3 = 0.089452
    res4 = 0.00206
    res5 = 0.002221
    res6 = 1.
    stat = mcnemar(f_obs1, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res1], decimal=6)
    stat = mcnemar(f_obs2, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res2], decimal=6)
    stat = mcnemar(f_obs3, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [59, res3], decimal=6)
    stat = mcnemar(f_obs4, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [30, res4], decimal=6)
    stat = mcnemar(f_obs5, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [10, res5], decimal=6)
    stat = mcnemar(f_obs6, exact=True)
    assert_almost_equal([stat.statistic, stat.pvalue], [10, res6], decimal=6)
def test_mcnemar_chisquare():
    f_obs1 = np.array([[101, 121], [59, 33]])
    f_obs2 = np.array([[101,  70], [59, 33]])
    f_obs3 = np.array([[101,  80], [59, 33]])

    #> mcn = mcnemar.test(matrix(c(101, 121,  59,  33),nrow=2))
    res1 = [2.067222e01, 5.450095e-06]
    res2 = [0.7751938,    0.3786151]
    res3 = [2.87769784,   0.08981434]

    stat = mcnemar(f_obs1, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res1, rtol=1e-6)
    stat = mcnemar(f_obs2, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res2, rtol=1e-6)
    stat = mcnemar(f_obs3, exact=False)
    assert_allclose([stat.statistic, stat.pvalue], res3, rtol=1e-6)

    # test correction = False
    res1 = [2.135556e01, 3.815136e-06]
    res2 = [0.9379845,   0.3327967]
    res3 = [3.17266187,  0.07488031]

    res = mcnemar(f_obs1, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res1, rtol=1e-6)
    res = mcnemar(f_obs2, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res2, rtol=1e-6)
    res = mcnemar(f_obs3, exact=False, correction=False)
    assert_allclose([res.statistic, res.pvalue], res3, rtol=1e-6)
Beispiel #26
0
def do_mcnemar(fs1, fs2):
    path = './output/'

    pin1 = os.path.join(path + 'SVM_gold_versus_pred_30d_' + fs1 + '.pickle')
    pin2 = os.path.join(path + 'SVM_gold_versus_pred_30d_' + fs2 + '.pickle')

    df1 = pickle.load(open(pin1, 'rb'))
    df2 = pickle.load(open(pin2, 'rb'))

    df1['correct'] = df1['gold_' + fs1] == df1['pred_' + fs1]
    df2['correct'] = df2['gold_' + fs2] == df2['pred_' + fs2]

    table = np.zeros((2,2))

    """
                     | c2_correct | c2_incorrect
        c1_correct   |            |
        c1_incorrect |            |
    """

    for i in range(len(df1)):
        v1 = df1.iloc[i].correct
        v2 = df2.iloc[i].correct

        # c1_correct, c2_incorrect
        if v1 and not v2:
            table[0, 1] += 1
        # c1_correct, c2_correct
        if v1 and v2:
            table[0, 0] += 1
        # c1_incorrect, c2_correct
        if not v1 and v2:
            table[1, 0] += 1
        # c1_incorrect, c2_incorrect
        if not v1 and not v2:
            table[1, 1] += 1

    result = mcnemar(table, exact=True)

    # summarize the finding
    print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
    # interpret the p-value
    alpha = 0.05
    if result.pvalue > alpha:
        print('Same proportions of errors (fail to reject H0) for', fs1, fs2)
    else:
        print('Different proportions of errors (reject H0) for', fs1, fs2)

    return fs1, fs2, alpha, result.pvalue
Beispiel #27
0
def run_mcnemar(baseline_pred, experiment_pred, y_test):
    """ McNemar's Test (Significance) 
    http://www.atyun.com/25532.html
    It is a statistical evaluation of paired nominal data or classifiers.
    There are totally 2 tests constucting 2x2 contingency tables.
        /    | Test2 Pos    Test2 Neg | 
    Test1 Pos|                        |
    Test1 Neg|                        |

    Eg.                       Before grad
                        w. girl         w/o girl
    After    w. girl|     5(A)            18(B)
    Grad    w/o girl|     5(C)            22(D)
    H0 => The number of grads with girl is the same as that after grad.
    Ha => The number of grads with girl is the diff from that after grad.
    Chi-Square Distribution

    It wants to know whether two distributions are different only because of the random noise (Null hypothesis).
    """

    a = 0
    b = 0  # Baseline correct, experiment incorrect
    c = 0  # Baseline incorrect, experiment correct
    d = 0
    for b_pred, ex_pred, true in zip(baseline_pred, experiment_pred, y_test):
        if b_pred == true and ex_pred == true:
            a += 1
        elif b_pred == true and ex_pred != true:
            b += 1
        elif b_pred != true and ex_pred == true:
            c += 1
        else:
            d += 1

    table = [[a, b], [c, d]]

    # Example of calculating the mcnemar test
    # calculate mcnemar test
    result = mcnemar(table, exact=False, correction=False)
    # summarize the finding
    #print('statistic=%.3f, p-value=%.6f' % (result.statistic, result.pvalue))
    # interpret the p-value
    alpha = 0.05
    if result.pvalue > alpha:
        print('Same proportions of errors (fail to reject H0)')
    else:
        print('Different proportions of errors (reject H0)')

    return result
def perform_mcnemar_test(qid_to_agreements1, qid_to_agreements2):
    table = np.zeros((2, 2))
    qids = list(qid_to_agreements1.keys())
    for qid in qids:
        for _agree1, _agree2 in zip(qid_to_agreements1[qid], qid_to_agreements2[qid]):
            if _agree1 > 0 and _agree2 > 0:
                table[0][0] += 1
            elif _agree1 > _agree2:
                table[0][1] += 1
            elif _agree2 > _agree1:
                table[1][0] += 1
            else:
                table[1][1] += 1
    results = mcnemar(table, exact=False, correction=True)
    return table, results.pvalue
    def mcnemar(self, Features=None, Clstrs=None):
        """
            McNemar test of homogeneity.

            Parameters
            ----------
            Features: 2D_array_like
                The arrays must have the same shape, except in the dimension
            Clstrs: array_like

            Returns
            ----------
            statistic: float or int, array
                The test statistic is the chisquare statistic if exact is false.
                If the exact binomial distribution is used, then this contains the min(n1, n2),
                where n1, n2 are cases that are zero in one sample but one in the other sample.
            pvalue: float or array
                p-value of the null hypothesis of equal marginal distributions.

            Notes
            ----------
            This is a special case of Cochran’s Q test, and of the homogeneity test.
            The results when the chisquare distribution is used are identical, except for continuity correction.
        """
        if Features is None:
            Features = self.__data.columns[:-1].copy()

        if Clstrs is None:
            Clstrs = self.__data["Clusters"].copy()
            Clstrs = Clstrs.dropna().unique().tolist()

        Clstrs.sort()

        for feature in Features:
            print("\n\n", feature,"\n")
            sub = self.__data[[feature, "Clusters"]].copy()
            sub = sub.dropna()
            sub_df = []
            for cluster in Clstrs:
                sub_df.append(sub[sub["Clusters"] == cluster][feature].values.tolist())
            for i, data_i in enumerate(sub_df):
                for j in range(i+1, len(sub_df)):
                    table = [sub_df[i], sub_df[j]]
                    res = contingency_tables.mcnemar(table)
                    if res.pvalue < 0.05:
                        print("The feature", feature, "is significant for clusters", Clstrs[i], "and", Clstrs[j])
                    else:
                        print("The feature", feature, "is not significant for clusters", Clstrs[i], "and", Clstrs[j])
Beispiel #30
0
def reject_null(misclf, total_examples, clf_name1, clf_name2):
    size = intersection_size(misclf[clf_name1], misclf[clf_name2])
    a = [[0, 0], [0, 0]]
    # misclassified by both
    a[0][0] = size
    # misclassified by A
    a[0][1] = len(misclf[clf_name1]) - size
    # misclassified by B
    a[1][0] = len(misclf[clf_name2]) - size
    # not misclassified by A or B
    a[1][1] = total_examples - a[0][0] - a[0][1] - a[1][0]
    result = mcnemar(a, exact=True)
    #print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
    #alpha = 0.05
    print(clf_name1 + " v " + clf_name2 + " " + str(result.pvalue))
    return result.pvalue
Beispiel #31
0
def run_mcnemar_test(report_df: pd.DataFrame) -> Tuple[float, float]:
    mask_correct_0 = report_df.loc[:, "isCorrect_0"]
    mask_correct_1 = report_df.loc[:, "isCorrect_1"]
    contingency_table = (
        (
            (mask_correct_0 & mask_correct_1).sum(),
            (mask_correct_0 & ~mask_correct_1).sum(),
        ),
        (
            (~mask_correct_0 & mask_correct_1).sum(),
            (~mask_correct_0 & ~mask_correct_1).sum(),
        ),
    )
    result = mcnemar(contingency_table)

    return result.statistic, result.pvalue
Beispiel #32
0
def mc_nemar(y_rbf, y_linear):

    table = pd.crosstab(pd.Series(y_rbf),
                        pd.Series(y_linear),
                        rownames=["y_rbf"],
                        colnames=["y_linear"],
                        dropna=False)  #icine array de aliyor
    # calculate mcnemar test
    result = mcnemar(table, exact=True)
    # summarize the finding
    print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
    # interpret the p-value
    alpha = 0.05
    if result.pvalue >= alpha:
        return 0
    else:
        return 1
    def mcnemar_quotes(self):
        """ Run McNemar test on quotes. """

        a = 0  # Both correct
        b = 0  # Baseline correct, experiment incorrect
        c = 0  # Baseline incorrect, experiment correct
        d = 0  # Both incorrect
        for baseline_quote, experimental_quote, gold_quote in zip(
                self.ordered_predictions['quotes']['baseline'],
                self.ordered_predictions['quotes']['experimental'],
                self.ordered_predictions['quotes']['gold']):
            if utils.characters_match(
                    baseline_quote.speaker,
                    gold_quote.speaker) and utils.characters_match(
                        experimental_quote.speaker, gold_quote.speaker):
                a += 1
            elif utils.characters_match(
                    baseline_quote.speaker,
                    gold_quote.speaker) and not utils.characters_match(
                        experimental_quote.speaker, gold_quote.speaker):
                b += 1
            elif not utils.characters_match(
                    baseline_quote.speaker,
                    gold_quote.speaker) and utils.characters_match(
                        experimental_quote.speaker, gold_quote.speaker):
                c += 1
            else:
                d += 1

        table = [[a, b], [c, d]]

        # Example of calculating the mcnemar test
        # calculate mcnemar test
        result = mcnemar(table, correction=False)
        # summarize the finding
        print('statistic=%.3f, p-value=%.6f' %
              (result.statistic, result.pvalue))
        # interpret the p-value
        alpha = 0.05
        if result.pvalue > alpha:
            print('Same proportions of errors (fail to reject H0)')
        else:
            print('Different proportions of errors (reject H0)')

        return result
def test_cochranq():
    # library(CVST)
    # table1 = matrix(c(1, 0, 1, 1,
    #                   0, 1, 1, 1,
    #                   1, 1, 1, 0,
    #                   0, 1, 0, 0,
    #                   0, 1, 0, 0,
    #                   1, 0, 1, 0,
    #                   0, 1, 0, 0,
    #                   1, 1, 1, 1,
    #                   0, 1, 0, 0), ncol=4, byrow=TRUE)
    # rslt1 = cochranq.test(table1)
    # table2 = matrix(c(0, 0, 1, 1, 0,
    #                   0, 1, 0, 1, 0,
    #                   0, 1, 1, 0, 1,
    #                   1, 0, 0, 0, 1,
    #                   1, 1, 0, 0, 0,
    #                   1, 0, 1, 0, 0,
    #                   0, 1, 0, 0, 0,
    #                   0, 0, 1, 1, 0,
    #                   0, 0, 0, 0, 0), ncol=5, byrow=TRUE)
    # rslt2 = cochranq.test(table2)

    table = [[1, 0, 1, 1],
             [0, 1, 1, 1],
             [1, 1, 1, 0],
             [0, 1, 0, 0],
             [0, 1, 0, 0],
             [1, 0, 1, 0],
             [0, 1, 0, 0],
             [1, 1, 1, 1],
             [0, 1, 0, 0]]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 4.2)
    assert_allclose(df, 3)

    table = [[0, 0, 1, 1, 0],
             [0, 1, 0, 1, 0],
             [0, 1, 1, 0, 1],
             [1, 0, 0, 0, 1],
             [1, 1, 0, 0, 0],
             [1, 0, 1, 0, 0],
             [0, 1, 0, 0, 0],
             [0, 0, 1, 1, 0],
             [0, 0, 0, 0, 0]]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 1.2174, rtol=1e-4)
    assert_allclose(df, 4)

    # Cochran's q and Mcnemar are equivalent for 2x2 tables
    data = table[:, 0:2]
    xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1]))
    b1 = ctab.cochrans_q(data, return_object=True)
    b2 = ctab.mcnemar(xtab, exact=False, correction=False)
    assert_allclose(b1.statistic, b2.statistic)
    assert_allclose(b1.pvalue, b2.pvalue)

    # Test for printing bunch
    assert_equal(str(b1).startswith("df          1\npvalue      0.65"), True)
def test_cochranq():
    """
    library(CVST)
    table1 = matrix(c(1, 0, 1, 1,
                      0, 1, 1, 1,
                      1, 1, 1, 0,
                      0, 1, 0, 0,
                      0, 1, 0, 0,
                      1, 0, 1, 0,
                      0, 1, 0, 0,
                      1, 1, 1, 1,
                      0, 1, 0, 0), ncol=4, byrow=TRUE)
    rslt1 = cochranq.test(table1)
    table2 = matrix(c(0, 0, 1, 1, 0,
                      0, 1, 0, 1, 0,
                      0, 1, 1, 0, 1,
                      1, 0, 0, 0, 1,
                      1, 1, 0, 0, 0,
                      1, 0, 1, 0, 0,
                      0, 1, 0, 0, 0,
                      0, 0, 1, 1, 0,
                      0, 0, 0, 0, 0), ncol=5, byrow=TRUE)
    rslt2 = cochranq.test(table2)
    """

    table = [
        [1, 0, 1, 1],
        [0, 1, 1, 1],
        [1, 1, 1, 0],
        [0, 1, 0, 0],
        [0, 1, 0, 0],
        [1, 0, 1, 0],
        [0, 1, 0, 0],
        [1, 1, 1, 1],
        [0, 1, 0, 0],
    ]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 4.2)
    assert_allclose(df, 3)

    table = [
        [0, 0, 1, 1, 0],
        [0, 1, 0, 1, 0],
        [0, 1, 1, 0, 1],
        [1, 0, 0, 0, 1],
        [1, 1, 0, 0, 0],
        [1, 0, 1, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 0, 0, 0, 0],
    ]
    table = np.asarray(table)

    stat, pvalue, df = ctab.cochrans_q(table, return_object=False)
    assert_allclose(stat, 1.2174, rtol=1e-4)
    assert_allclose(df, 4)

    # Cochran's q and Mcnemar are equivalent for 2x2 tables
    data = table[:, 0:2]
    xtab = np.asarray(pd.crosstab(data[:, 0], data[:, 1]))
    b1 = ctab.cochrans_q(data, return_object=True)
    b2 = ctab.mcnemar(xtab, exact=False, correction=False)
    assert_allclose(b1.statistic, b2.statistic)
    assert_allclose(b1.pvalue, b2.pvalue)