def test_mannwhithney(predfile1, predfile2, testfile, testfile2):
    y_true1, y_pred1, y_true_prec1, y_pred_prec1 = evaluate(
        testfile, predfile1)
    y_true2, y_pred2, y_true_prec2, y_pred_prec2 = evaluate(
        testfile2, predfile2)
    print('\n First model: ', predfile1)
    print('Ex: ', y_pred1[:10], ' Len: ', len(y_pred1))
    print('Second model: ', predfile2)
    print('Ex: ', y_pred2[:10], ' Len: ', len(y_pred2))
    print(
        'Is testset the same? ',
        len([
            i for i in np.equal(np.array(y_true1), np.array(y_true2))
            if i is False
        ]))

    mc_tb = mcnemar_table(y_target=np.array(y_true1),
                          y_model1=np.array(y_pred1),
                          y_model2=np.array(y_pred2))
    print('Contingency table: ', mc_tb)
    mcnemar_res = mcnemar(mc_tb)
    print('McNemar:  p value: {:.20f}'.format(mcnemar_res.pvalue))
    chi2, p = mlx_mcnemar(ary=mc_tb, corrected=True)
    print('McNemar: chi:{:.4f}  p value: {}'.format(chi2, p))
    mc_tb_prec = mcnemar_table(y_target=np.array(y_true_prec1),
                               y_model1=np.array(y_pred_prec1),
                               y_model2=np.array(y_pred_prec2))
    mcnemar_res_prec = mcnemar(mc_tb_prec)
    print('McNemar PRECISION:  p value: {}'.format(mcnemar_res_prec.pvalue))
def statistics():
    # RUN combined_classify_to_csv() FIRST!

    # Get normpneum inceptionv3 model predictions
    csv_path = normpneum_bin_file_dir + '_incv3.csv'
    normpneum_res = pd.read_csv(csv_path, header=None).to_numpy()
    normpneum_incv3_class_preds = normpneum_res[:, 3]

    # Get normpneum inceptionv3 model predictions
    csv_path = normpneum_bin_file_dir + '_resnetv2.csv'
    normpneum_res = pd.read_csv(csv_path, header=None).to_numpy()
    normpneum_resnetv2_class_preds = normpneum_res[:, 3]

    # Get the test labels
    normpneum_test = np.argmax(normpneum_test_labels, axis=-1)

    # Contingency Table
    tb = mcnemar_table(y_target=normpneum_test,
                       y_model1=normpneum_incv3_class_preds,
                       y_model2=normpneum_resnetv2_class_preds)
    print(tb)

    # McNemar's test
    chi2, p = mcnemar(ary=tb, corrected=True)
    print('chi-squared:', chi2)
    print('p-value:', p)

    accuracy_normpneum_incv3 = accuracy_score(normpneum_test,
                                              normpneum_incv3_class_preds)
    accuracy_normpneum_resnetv2 = accuracy_score(
        normpneum_test, normpneum_resnetv2_class_preds)

    print(f"Test accuracy normpneum incv3: {accuracy_normpneum_incv3}")
    print(
        f"Test accuracy normpneum incresnetv2: {accuracy_normpneum_resnetv2}")
Example #3
0
def test_compare_to_mcnemar_on_2_models():

    y_true = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0])

    ym1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0])

    ym2 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0])

    q, p = cochrans_q(y_true, ym1, ym2)

    mcn_q, mcn_p = mcnemar(mcnemar_table(y_true, ym1, ym2),
                           corrected=False,
                           exact=False)

    assert q == mcn_q
    assert p == mcn_p
Example #4
0
def run_mcnemar_test(y_test, model_1_class_predictions,
                     model_2_class_predictions, model_1_name, model_2_name):
    """
    Runs the McNemar test to determine if there is a statistically significant difference in the class predictions.
    Writes the results and associated contingency table locally.

    :param y_test: y_test series
    :param model_1_class_predictions: class predictions from model 1
    :param model_2_class_predictions: class predictions from model 2
    :param model_1_name: name of the first model
    :param model_2_name: name of the second model
    """
    results_table = mcnemar_table(y_target=y_test,
                                  y_model1=model_1_class_predictions,
                                  y_model2=model_2_class_predictions)
    chi2, p = mcnemar(ary=results_table, corrected=True)
    pd.DataFrame({
        'chi2': [chi2],
        'p': [p]
    }).to_csv(os.path.join(f'{model_1_name}_{model_2_name}_mcnemar_test.csv'))
    board = checkerboard_plot(
        results_table,
        figsize=(6, 6),
        fmt='%d',
        col_labels=[f'{model_2_name} wrong', f'{model_2_name} right'],
        row_labels=[f'{model_1_name} wrong', f'{model_1_name} right'])
    plt.tight_layout()
    plt.savefig(
        os.path.join('modeling', 'comparison_files',
                     f'{model_1_name}_{model_2_name}_mcnemar_test.png'))
    plt.clf()
def test_compare_to_mcnemar_on_2_models():

    y_true = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                       0, 0, 0, 0, 0])

    ym1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0])

    ym2 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0])

    q, p = cochrans_q(y_true, ym1, ym2)

    mcn_q, mcn_p = mcnemar(mcnemar_table(y_true, ym1, ym2),
                           corrected=False,
                           exact=False)

    assert q == mcn_q
    assert p == mcn_p
Example #6
0
def test_input_binary_all_right():
    y_target = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model1 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model2 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    tb = mcnemar_table(y_target=y_target, y_model1=y_model1, y_model2=y_model2)
    expect = np.array([[8, 0], [0, 0]])
    np.testing.assert_array_equal(tb, expect)
Example #7
0
def summarize_feature_comparisons(
        base_clf: BaseEstimator, comparison_clfs: Dict[str, BaseEstimator], X_test, y_test
):
    from mlxtend.evaluate import mcnemar, cochrans_q, mcnemar_table

    summary_dict = collections.OrderedDict()
    mcnemar_tbs = dict()

    # create list of predicted values
    base_y_predict = base_clf.predict(X_test)
    y_predictions = [base_y_predict]
    for idx, (name, clf) in enumerate(comparison_clfs.items()):
        # get the probability
        y_predict_proba = clf.predict_proba(X_test)
        y_predict = clf.predict(X_test)

        # form mcnemar tables against base classifier
        tb = mcnemar_table(y_test, base_y_predict, y_predict)
        mcnemar_tbs[f"base vs {name}"] = tb.values()

        # store predictions per classifier
        y_predictions.append(y_predict)

    # first run cochrans Q test
    qstat, pval = cochrans_q(y_test, *y_predictions)
    summary_dict["cochrans_q"] = qstat
    summary_dict["cochrans_q_pval"] = pval

    # run mcnemars test against all the predictions
    for name, table in mcnemar_tbs.items():
        chi2stat, pval = mcnemar(table, exact=True)
        summary_dict[f"mcnemar_{name}_chi2stat"] = chi2stat
        summary_dict[f"mcnemar_{name}_pval"] = pval

    return summary_dict
Example #8
0
def test_input_binary():
    y_target = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
    y_model1 = np.array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0])
    y_model2 = np.array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])
    tb = mcnemar_table(y_target=y_target, y_model1=y_model1, y_model2=y_model2)
    expect = np.array([[4, 1], [2, 3]])

    np.testing.assert_array_equal(tb, expect)
Example #9
0
def test_input_binary_all_right():
    y_target = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model1 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model2 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    tb = mcnemar_table(y_target=y_target,
                       y_model1=y_model1,
                       y_model2=y_model2)
    expect = np.array([[8, 0],
                       [0, 0]])
def test_input_binary_all_right():
    y_target = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model1 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    y_model2 = np.array([0, 0, 0, 0, 1, 1, 1, 1])
    tb = mcnemar_table(y_target=y_target,
                       y_model1=y_model1,
                       y_model2=y_model2)
    expect = np.array([[8, 0],
                       [0, 0]])
Example #11
0
def test_input_nonbinary():
    y_target = np.array([0, 0, 0, 0, 0, 2, 1, 1, 1, 1])
    y_model1 = np.array([0, 5, 0, 0, 0, 2, 1, 0, 0, 0])
    y_model2 = np.array([0, 0, 1, 3, 0, 2, 1, 0, 0, 0])

    tb = mcnemar_table(y_target=y_target, y_model1=y_model1, y_model2=y_model2)
    expect = np.array([[4, 1], [2, 3]])

    np.testing.assert_array_equal(tb, expect)
Example #12
0
def mcNemar(target, model1, model2):
    y_target = np.array(target)
    # Class labels predicted by model 1
    y_model1 = np.array(model1)
    # Class labels predicted by model 2
    y_model2 = np.array(model2)
    tb = mcnemar_table(y_target=y_target, y_model1=y_model1, y_model2=y_model2)
    #print (tb)
    return tb
Example #13
0
def svm_p_value(trainData,testData, input_pred):
    svc = LinearSVC(max_iter = 10000, verbose=50, C= 0.1)
    train_x = np.array(list(trainData['player_array']))
    train_y = np.array(list(trainData['win']))
    test_x = np.array(list(testData['player_array']))
    test_y = np.array(list(testData['win']))
    
    svc.fit(train_x, train_y)

    test_pred = svc.predict(test_x)
    
    tb = mcnemar_table(y_target=test_y, y_model1=input_pred, y_model2=test_pred)
    chi2, p = mcnemar(ary=tb, corrected=True)
           
    return p
Example #14
0
def mlp_p_value(trainData,testData, input_pred):
    ann = MLPClassifier(verbose=True, max_iter= 500,tol= 0.0005, solver= 'adam', alpha= 0.0001, activation= 'logistic', hidden_layer_sizes = (50,40))
    train_x = np.array(list(trainData['player_array']))
    train_y = np.array(list(trainData['win']))
    test_x = np.array(list(testData['player_array']))
    test_y = np.array(list(testData['win']))
    
    ann.fit(train_x, train_y)

    test_pred = ann.predict(test_x)
    
    tb = mcnemar_table(y_target=test_y, y_model1=input_pred, y_model2=test_pred)
    chi2, p = mcnemar(ary=tb, corrected=True)
           
    return p
Example #15
0
def log_p_value(trainData,testData, input_pred):
    log = LogR(max_iter=500, solver='newton-cg', C=0.1)
    
    train_x = np.array(list(trainData['player_array']))
    train_y = np.array(list(trainData['win']))
    test_x = np.array(list(testData['player_array']))
    test_y = np.array(list(testData['win']))
    
    
    log.fit(train_x, train_y)

    test_pred = log.predict(test_x)
    
    tb = mcnemar_table(y_target=test_y, y_model1=input_pred, y_model2=test_pred)
    chi2, p = mcnemar(ary=tb, corrected=True)
           
    return p
Example #16
0
    def mcnemar_test(target, model_1_pred, model_2_pred):
        """
        Calculates p-value of the mcnemar test
        It builds a contingency table and uses that to calculate the p-value
        :param target: a numpy array that has the actual target values
        :param model_1_pred: a numpy array that contains values based on prediction of model 1
        :param model_2_pred: a numpy array that contains values based on prediction of model 2
        :return p_value: the probability calculated under the chi-squared distribution
        """
        mc_table = mcnemar_table(y_target=target, y_model1=model_1_pred, y_model2=model_2_pred)

        n = mc_table[0, 1] + mc_table[1, 0]
        # if the sum of b + c is less than 25, we should you use the binomial distribution
        # instead of the chi-squared distribution. Check https://en.wikipedia.org/wiki/McNemar%27s_test
        binomial = True if n < 25 else False
        _, p_value = mcnemar(ary=mc_table, exact=binomial)

        return p_value
Example #17
0
def main(file_1, file_2):
    a, b, c, d = 0, 0, 0, 0
    nb_lines = 0
    y_ground, y_1, y_2 = [], [], []
    with open(file_1) as f1, open(file_2) as f2:
        for line_1, line_2 in zip(f1, f2):
            ground_1, pred_1 = map(int, line_1.strip().split()[1:])
            ground_2, pred_2 = map(int, line_2.strip().split()[1:])
            if ground_1 != ground_2:
                logger.error('Files do not belong to the same dataset')
                sys.exit(1)

            y_ground.append(ground_1)
            y_1.append(pred_1)
            y_2.append(pred_2)
            if pred_1 == ground_1:
                if pred_2 == ground_1:
                    a += 1
                else:
                    b += 1
            else:
                if pred_2 == ground_1:
                    c += 1
                else:
                    d += 1
            nb_lines += 1
    logger.info('Loaded {} lines..'.format(nb_lines))
    logger.info('| {} | {} |'.format(a, b))
    logger.info('| {} | {} |'.format(c, d))

    y_ground = np.array(y_ground)
    y_1 = np.array(y_1)
    y_2 = np.array(y_2)

    tb = mcnemar_table(y_target=y_ground, y_model1=y_1, y_model2=y_2)
    logger.info('\n {}'.format(tb))

    chi2, p = mcnemar(ary=tb, corrected=True)
    logger.info('chi-squared: {}'.format(chi2))
    logger.info('p-value: {}'.format(p))
def stat_test(df, classifier1, classifier2):
    x = df['Cleaned'].values
    y = df['Class'].values

    # split dataset into training and test sets, with 80:20 split
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1000,
                                                        stratify=y)

    # vectorizer for first classifier
    # vectorizer = CountVectorizer()
    vectorizer = TfidfVectorizer()

    vectorizer.fit(x_train)
    X_test = vectorizer.transform(x_test)

    y_pred_1 = classifier1.predict(X_test)

    # vectorizer for second classifier
    # vectorizer = CountVectorizer()
    vectorizer = TfidfVectorizer()

    vectorizer.fit(x_train)
    X_test = vectorizer.transform(x_test)

    y_pred_2 = classifier2.predict(X_test)

    contingency_table = mcnemar_table(y_target=y_test,
                                      y_model1=y_pred_1,
                                      y_model2=y_pred_2)

    print(contingency_table)

    chi2, p_val = mcnemar(ary=contingency_table, corrected=True)
    print('chi-squared:', chi2)
    print('p-value:', p_val)
Example #19
0
def compute_stat_sig(systems_data, measure):
    significance = defaultdict(list)
    for system in ["systemNoFcNoFs", "systemNoFc", "systemFcPost", "sota"]:
        for other_system in ["sota", "human"]:
            if system == other_system:
                continue
            sys_data = [x[measure] for x in systems_data[system]]
            other_sys_data = [x[measure] for x in systems_data[other_system]]
            true_data = [1] * len(sys_data)

            tb_b = mcnemar_table(y_target=np.array(true_data),
                                 y_model1=np.array(sys_data),
                                 y_model2=np.array(other_sys_data))

            chi2, p_value = mcnemar(ary=tb_b, corrected=True)
            print(tb_b)
            print(
                f"mcnemar {system},{other_system}: chi2: {chi2}, p-value {p_value}"
            )
            if p_value <= 0.05 and p_value >= 0:
                significance[system].append(other_system[0])
        significance[system] = ",".join(significance[system])
    return significance
Example #20
0
                      df_result['bi_F1'].to_numpy(),
                      df_result['unibi_F1'].to_numpy())
    print("ANNOVA F1 : %0.5f, %0.5f" % result)

    # Coher Q analysis
    y_uni = sr_uni.to_numpy()
    y_bi = sr_bi.to_numpy()
    y_unibi = sr_unibi.to_numpy()
    q, p_value = cochrans_q(y, y_uni, y_bi, y_unibi)
    print("COHRAN Q-Test: q: %0.5f, p_value: %0.5f" % (q, p_value))

    l_grams = ['uni', 'bi', 'unibi']
    l_rslt = [y_uni, y_bi, y_unibi]
    l_pair = list(zip(l_grams, l_rslt))

    l_mcnemar_rslt = []
    for i, t0 in enumerate(l_pair):
        for j, t1 in enumerate(l_pair[i + 1:]):
            k0 = t0[0]
            k1 = t1[0]
            v0 = t0[1]
            v1 = t1[1]

            tb = mcnemar_table(y_target=y, y_model1=v0, y_model2=v1)
            chi2, p = mcnemar(ary=tb, corrected=True)
            l_mcnemar_rslt.append("{chi2:.5f}".format(chi2=chi2))
            l_mcnemar_rslt.append("{p:.5f}".format(p=p))
            print(f"McNemar %s v %s:  chi2 : %0.5f, p_value: %0.5f" %
                  (k0, k1, chi2, p))
    print(" ".join(l_mcnemar_rslt))
Example #21
0
def train():
    data_x1, data_x2, data_y = load_tensors()

    sizedata = len(data_x1)
    print("Data of size:", sizedata)
    print("Data 2 of size:", len(data_x2))
    # Split dataset into 5 sub-datasets
    splitted_x1 = list(split(data_x1, 5))
    splitted_x2 = list(split(data_x2, 5))
    splitted_y = list(split(data_y, 5))
    print("Available GPU :", torch.cuda.is_available())
    torch.cuda.set_device(0)
    k = ARGS.kFold

    # Prepare array of scores
    precision_list = []
    recall_list = []
    # valloss_list = []
    AUC_list = []
    for ind_i in range(0, k):
        # Prepare X_train Y_train X_test Y_test
        X_test1 = splitted_x1[ind_i]
        X_test2 = splitted_x2[ind_i]
        Y_test = splitted_y[ind_i]
        # Deep copy, otherwise iteration problem
        copysplitX1 = list(splitted_x1)
        copysplitX2 = list(splitted_x2)
        copysplitY = list(splitted_y)
        del copysplitX1[ind_i]
        del copysplitX2[ind_i]
        del copysplitY[ind_i]
        X_train1 = copysplitX1  # CUI + CCS
        X_train2 = copysplitX2  # CUI only
        Y_train = copysplitY
        modelCUI = Network(0).cuda()
        modelCCS = Network(1).cuda()
        # XAVIER Init
        modelCUI.apply(init_weights)
        modelCCS.apply(init_weights)
        with torch.cuda.device(0):
            # Hyperparameters :
            epochs = ARGS.nEpochs
            batchsize = ARGS.batchSize
            learning_rate = ARGS.lr
            log_interval = 2
            criterion = nn.BCEWithLogitsLoss()
            # criterion = nn.BCELoss()
            # criterion = nn.CrossEntropyLoss()
            optimizer1 = optim.SGD(modelCUI.parameters(), lr=learning_rate)
            optimizer2 = optim.SGD(modelCCS.parameters(), lr=learning_rate)
            # optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            # Train loader
            numplist = np.array(X_train2)
            arrX = np.concatenate(numplist).tolist()
            tensor_x = torch.Tensor(arrX).cuda()
            numplist = np.array(Y_train)
            arrY = np.concatenate(numplist).tolist()
            tensor_y = torch.Tensor(arrY).cuda()
            print("Shape X:", np.shape(arrX), "Shape Y:", np.shape(arrY))
            dataset = dt.TensorDataset(tensor_x,
                                       tensor_y)  # create your dataset
            train_loader1 = dt.DataLoader(dataset,
                                          batch_size=batchsize,
                                          shuffle=True)

            numplist = np.array(X_train1)
            arrX = np.concatenate(numplist).tolist()
            tensor_x = torch.Tensor(arrX).cuda()
            dataset = dt.TensorDataset(tensor_x, tensor_y)
            train_loader2 = dt.DataLoader(dataset,
                                          batch_size=batchsize,
                                          shuffle=True)

            # Test loader
            tensor_x = torch.Tensor(
                np.array(X_test2).tolist()).cuda()  # transform to torch tensor
            tensor_y = torch.Tensor(np.array(Y_test).tolist()).cuda()
            dataset = dt.TensorDataset(tensor_x,
                                       tensor_y)  # create your dataset
            test_loader1 = dt.DataLoader(dataset,
                                         batch_size=batchsize,
                                         shuffle=False)
            tensor_x = torch.Tensor(np.array(X_test1).tolist()).cuda()
            dataset = dt.TensorDataset(tensor_x, tensor_y)
            test_loader2 = dt.DataLoader(dataset,
                                         batch_size=batchsize,
                                         shuffle=False)

            # Training model CUI
            print("Training CUI model...")
            for epoch in range(epochs):
                for batch_idx, (data, target) in enumerate(train_loader1):
                    data, target = Variable(data), Variable(target)
                    optimizer1.zero_grad()
                    net_out = modelCUI(data)
                    loss = criterion(net_out, target)
                    loss.backward()
                    optimizer1.step()
                    # if batch_idx % log_interval == 0:
                    #     print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: '.format(
                    #         epoch, batch_idx * len(data), len(train_loader1.dataset),
                    #                100. * batch_idx / len(train_loader1)))
                    #     print(loss.data)

            print("Training CUI+CCS model...")
            # Training model CUI+CCS
            for epoch in range(epochs):
                for batch_idx, (data, target) in enumerate(train_loader2):
                    data, target = Variable(data), Variable(target)
                    optimizer2.zero_grad()
                    net_out = modelCCS(data)
                    loss = criterion(net_out, target)
                    loss.backward()
                    optimizer2.step()
                    # if batch_idx % log_interval == 0:
                    #     print('Train Epoch: {} [{}/{} ({:.0f}%)]\t Loss: '.format(
                    #         epoch, batch_idx * len(data), len(train_loader2.dataset),
                    #                100. * batch_idx / len(train_loader2)))
                    #     print(loss.data)

            # Testing and save score
            total = 0
            correct = 0
            modelCUI.eval()
            modelCCS.eval()

            P = list()
            R = list()
            test_loader_list = list([test_loader1, test_loader2])
            model_list = list([modelCUI, modelCCS])
            nemarlist = list([np.array([]), np.array([])])

            # Precisions
            for model, test_loader in zip(model_list, test_loader_list):
                for i in range(1, 4):
                    for data in test_loader:
                        x, labels = data
                        outputs = model(Variable(x)).detach(
                        )  # output is a tensor of size [BATCHSIZE][ARGS.numberOfOutputCodes]
                        _, predicted = torch.topk(outputs.data, i)
                        for y_predlist, y in zip(predicted, labels):
                            for y_pred in y_predlist:
                                total += 1
                                if y[y_pred] == 1:
                                    correct += 1

                    precision = correct / total
                    P.append(precision)
                    correct = 0
                    total = 0

            for model, test_loader, mcnemar_idx in zip(model_list,
                                                       test_loader_list,
                                                       list([0, 1])):
                # Number of diagnostic for each sample (mean of 12 codes, max of 30 codes, R@10 - R@20 - R@30 seems appropriate)
                total_true_list = list()
                for data in test_loader:
                    x, labels = data
                    for y in labels:
                        total_true = 0
                        for val in y:
                            if val == 1:
                                total_true += 1
                        total_true_list.append(total_true)
                # Recalls
                for i in range(10, 40, 10):
                    total_true_list_cpy = list(total_true_list)
                    for data in test_loader:
                        x, labels = data
                        outputs = model(Variable(x)).detach()
                        _, predicted = torch.topk(outputs.data, i)
                        for y_predlist, y in zip(predicted, labels):
                            total += total_true_list_cpy.pop(0)
                            for y_pred in y_predlist:
                                if y[y_pred] == 1:
                                    correct += 1
                                    if i == 30:
                                        nemarlist[mcnemar_idx] = np.append(
                                            nemarlist[mcnemar_idx], 1)
                                else:
                                    if i == 30:
                                        if correct < total:
                                            nemarlist[mcnemar_idx] = np.append(
                                                nemarlist[mcnemar_idx], 0)
                                        else:
                                            nemarlist[mcnemar_idx] = np.append(
                                                nemarlist[mcnemar_idx], 1)
                                            # Else, there's no more diagnoses to be found, so we will not consider it as wrong

                    recall = correct / total
                    R.append(recall)
                    correct = 0
                    total = 0
                precision_list.append(P)
                recall_list.append(R)
                # AUROC
                YTRUE = None
                YPROBA = None
                for data in test_loader:
                    x, labels = data
                    x, labels = Variable(x), Variable(labels)
                    outputs = model(x).detach().cpu().numpy()
                    labels = labels.detach().cpu().numpy()
                    for batch_true, batch_prob in zip(labels, outputs):
                        YTRUE = np.concatenate(
                            (YTRUE, [batch_true]),
                            axis=0) if YTRUE is not None else [batch_true]
                        YPROBA = np.concatenate(
                            (YPROBA, [batch_prob]),
                            axis=0) if YPROBA is not None else [batch_prob]
                ROC_avg_score = roc(YTRUE,
                                    YPROBA,
                                    average='micro',
                                    multi_class='ovr')
                AUC_list.append(ROC_avg_score)

            # McNemar test
            nemar_true = np.ones(nemarlist[0].size)
            nemar_m1 = nemarlist[0]
            nemar_m2 = nemarlist[1]
            tb = mcnemar_table(y_target=nemar_true,
                               y_model1=nemar_m1,
                               y_model2=nemar_m2)
            # print("Matrix: ", tb)
            chi2, p = mcnemar(ary=tb, corrected=True)
            # print('chi-squared:', chi2)
            # print('p-value:', p)
            filesave = open("McNemar_report.txt", "a")
            filesave.write("\nMatrix: ")
            filesave.write(str(tb))
            filesave.write("\np-value and chi-squared:")
            filesave.write(str(p))
            filesave.write(" ")
            filesave.write(str(chi2))
            filesave.close()

    # Output score of each fold + average
    print("Scores for each fold:")
    print("Precision:", precision_list)
    print("Recall:", recall_list)
    print("AUROC:", AUC_list)
predictions2 = "./results/2_single_COBRE/single/predictions.npz"

# ---------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------

pred1_file = np.load(predictions1)
prediction_1 = pred1_file["y_predictions"]

pred1 = prediction_1[0]
for i in range(1, len(prediction_1)):
    pred1 = np.hstack((pred1, prediction_1[i]))

y_true_list = pred1_file["y_true"]

y_true = y_true_list[0]
for i in range(1, len(y_true_list)):
    y_true = np.hstack((y_true, y_true_list[i]))

pred2_file = np.load(predictions2)
prediction_2 = pred2_file["y_predictions"]

pred2 = prediction_2[0]
for i in range(1, len(prediction_2)):
    pred2 = np.hstack((pred2, prediction_2[i]))

tb = mcnemar_table(y_target=y_true, y_model1=pred1, y_model2=pred2)

chi2, p = mcnemar(ary=tb, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)
Example #23
0
# Predict Output
y_pred_SVM = SVM.predict(data_test[:, :-1])

# Use accuracy_score function to get the accuracy

print("SVM Accuracy Score: ",
      accuracy_score(y_pred_SVM, data_test[:, 4]) * 100)

# Comparing Classifiers - McNemar test

#pip install mlxtend
from mlxtend.evaluate import mcnemar_table

tb = mcnemar_table(y_target=data_test[:, 4],
                   y_model1=y_pred_SVM,
                   y_model2=y_pred_GNB)

print(tb)

chi_GNBC_SVM = ((abs(tb[0, 1] - tb[1, 0]) - 1)**2) / (tb[0, 1] + tb[1, 0])
print(chi_GNBC_SVM)

# Comparing Classifiers - Approximate normal test

# SVM test
from sklearn.metrics import confusion_matrix
Conf_matrix_SVM = confusion_matrix(data_test[:, 4], y_pred_SVM)
print(Conf_matrix_SVM)

X = (Conf_matrix_SVM[0, 1] + Conf_matrix_SVM[1, 0])
Example #24
0
def score(data_folder, out_folder, task, score_folder):
    data_folder = Path(data_folder)
    out_folder = Path(out_folder)
    datasets = ["ldc", "viggo", "webnlg", "e2e"]
    systems = ["systemNoFcNoFs", "systemNoFc", "systemFcPost", "sota", "human"]
    stats = {}
    first = []
    second = []
    for dataset in datasets:

        print(f"processing {dataset}")
        systems_data = {}

        for system in systems:
            systems_data[system] = json.load(
                open(data_folder / dataset / f"{system}.json"))

        print(f"dataset: {dataset}")
        all_scored = defaultdict(list)
        score_folder = Path(score_folder)
        score_file = score_folder / task / (f"{dataset}.csv")
        total_texts = 5
        try:
            df = pd.read_csv(score_file)
        except:
            print(f"{score_file} not available.")
            continue
        scores = df.to_dict(orient="records")
        try:
            input_df = pd.read_csv(out_folder / task /
                                   (f"mturk_{dataset}.csv"))
        except:
            print(f"ignoring {dataset}")
            continue
        input_data = input_df.to_dict(orient="records")

        if task == "fidelity_annotations":
            for item in scores:
                for i in range(total_texts):
                    text = item[f"Input.text{i + 1}"]
                    index = item["Input.index"]
                    accurate = f"Answer.text{i + 1}_accurate.text{i + 1}_accurate"
                    key = f"{index}_{text}"
                    try:
                        all_scored[key].append({"accurate": item[accurate]})
                    except:
                        import ipdb
                        ipdb.set_trace()

            fidelity_scores = []

            all_ser_scores = []
            all_sfc_scores = []
            true_scores_sfc = []
            true_scores_ser = []
            sfc_data = defaultdict(list)
            ser_data = defaultdict(list)

            for x in all_scored:
                try:
                    one = all_scored[x][0]["accurate"]
                    two = all_scored[x][1]["accurate"]
                    first.append(one)
                    second.append(two)
                except:
                    pass

            for item in input_data:
                for i in range(total_texts):
                    text_i = item[f"text{i + 1}"]
                    system = item[f"system{i + 1}"]
                    index = item["index"]
                    key = f"{index}_{text_i}"

                    if key in all_scored:
                        obj = systems_data[system][index]
                        score = np.mean(
                            [int(x["accurate"]) for x in all_scored[key]])
                        # these have to be reconciled if disagreeing: take ceil or floor

                        sample_type = f'{"A_D" if obj["sfc_correct"] else "E_D"}'
                        if dataset != "ldc":
                            sample_type += f',{"A_H" if obj["ser_correct"] else "E_H"}'

                        fidelity_scores.append({
                            "ind":
                            index,
                            "system":
                            system,
                            "value":
                            math.ceil(score),
                            "sample_type":
                            sample_type,
                            "text":
                            text_i,
                            "data":
                            item["data"],
                            "original_text":
                            obj["original_" +
                                dataset_fields[dataset]["text"].strip()],
                            "sfc_correct":
                            obj["sfc_correct"],
                            "ser_correct":
                            obj["ser_correct"] if "ser_correct" in obj else "",
                        })
                        # Reconciled cases are those where the expert annotators disagreed. They discussed these and
                        # reached the following agreements
                        reconciled = {
                            "Example 1": 0,
                            "Example 2": 1,
                        }
                        if text_i in reconciled:
                            true_scores_sfc.append(reconciled[text_i])
                            true_scores_ser.append(reconciled[text_i])
                        else:
                            add_closest_score(score, true_scores_sfc,
                                              obj["sfc_correct"])
                            if dataset != "ldc":
                                add_closest_score(score, true_scores_ser,
                                                  obj["ser_correct"])

                        all_sfc_scores.append(obj["sfc_correct"])

                        sfc_data[system].append(obj["sfc_correct"])

                        if dataset != "ldc":
                            all_ser_scores.append(obj["ser_correct"])
                            ser_data[system].append(obj["ser_correct"])

            if dataset != "ldc":
                c_report = classification_report(true_scores_ser,
                                                 all_ser_scores)
                stats[f"{dataset}_ser_report"] = classification_report(
                    true_scores_ser, all_ser_scores, output_dict=True)
                print("SER")
                print(c_report)

            c_report = classification_report(true_scores_sfc, all_sfc_scores)
            stats[f"{dataset}_sfc_report"] = classification_report(
                true_scores_sfc, all_sfc_scores, output_dict=True)
            print("SFC")
            print(c_report)

            mturk_df = pd.DataFrame(fidelity_scores)

            agg_stats = mturk_df.groupby(["system"]).agg(["mean", "count"])
            print(agg_stats)
            stats[f"{dataset}_score"] = agg_stats.to_dict()[("value", "mean")]
            stats[f"{dataset}_count"] = agg_stats.to_dict()[("value", "count")]
            print(
                mturk_df.groupby(["system",
                                  "sample_type"]).agg(["mean", "count"]))

            if dataset != "ldc":
                tb_b = mcnemar_table(
                    y_target=np.array(true_scores_sfc),
                    y_model1=np.array(all_sfc_scores),
                    y_model2=np.array(all_ser_scores),
                )
                print(tb_b)
                chi2, p = mcnemar(ary=tb_b, corrected=True)
                print(f"mcnemar chi2: {chi2}, p-value {p}")

            for measure in ["sfc_correct", "ser_correct"]:
                if measure == "ser_correct" and dataset == "ldc":
                    continue
                stats[f"{dataset}_significance_{measure}"] = compute_stat_sig(
                    systems_data, system, measure)

        elif task == "fluency":

            for item in scores:
                for i in range(total_texts):
                    field = f"Input.text{i + 1}"
                    answer_field = f"Answer.fluency{i + 1}"
                    all_scored[item[field]].append(item[answer_field])

            for x in all_scored:
                all_scored[x] = {
                    "average": np.mean(all_scored[x]),
                    "count": len(all_scored[x])
                }

            fluency_scores = defaultdict(list)

            for item in input_data:
                for i in range(total_texts):
                    if item[f"text{i + 1}"] in all_scored:
                        score = all_scored[item[f"text{i + 1}"]]["average"]
                        system = item[f"system{i + 1}"]
                        fluency_scores[system].append(score)

            fluency_df_values = []
            for system in fluency_scores:
                fluency_df_values.extend([{
                    "system": system,
                    "value": fluency_scores[system][i]
                } for i in range(len(fluency_scores[system]))])

            mturk_df = pd.DataFrame(fluency_df_values)
            agg_stats = mturk_df.groupby(["system"
                                          ]).agg(["mean", "count", "median"])
            print(agg_stats)
            stats[dataset] = agg_stats.to_dict()[("value", "mean")]

            test_stats = sp.posthoc_wilcoxon(mturk_df,
                                             val_col="value",
                                             group_col="system",
                                             sort=True,
                                             zero_method="zsplit")
            print(test_stats)
            significance = defaultdict(list)
            for system in [
                    "systemNoFcNoFs", "systemNoFc", "systemFcPost", "sota"
            ]:
                for other_system in ["sota", "human"]:
                    p_value = test_stats.loc[system, other_system]
                    if p_value <= 0.05 and p_value >= 0:
                        significance[system].append(other_system[0])
                significance[system] = ",".join(significance[system])
            stats[f"{dataset}_significance"] = significance

    print(cohen_kappa_score(first, second))
    json.dump(stats, open(data_folder / f"{task}.json", "w"), indent=2)
Example #25
0
def main():
    #open needed files
    test_data = pd.read_csv('data/test_data.csv', encoding='ISO-8859-1')
    train_data = pd.read_csv('data/train_data.csv', encoding='ISO-8859-1')
    train_bigram = pd.read_pickle('saved_pickles_models/bigram.pkl')
    train_id2word = pd.read_pickle('saved_pickles_models/id2word.pkl')
    train_corpus = pd.read_pickle('saved_pickles_models/corpus.pkl')
    model = pd.read_pickle('saved_pickles_models/lda_model2.model')

    scaler = StandardScaler()
    test_data_list = []
    feature_vectors = []
    test_vectors = []

    #get distributions from every tweet in train_data
    print('Getting distribution...')

    for i in range(len(train_data)):
        train_top_topics = model.get_document_topics(train_corpus[i],
                                                     minimum_probability=0.0)
        train_topic_vector = [train_top_topics[i][1] for i in range(10)]
        feature_vectors.append(train_topic_vector)

    x = np.array(feature_vectors)
    y = np.array(train_data.relevant)

    kf = KFold(5, shuffle=True, random_state=42)
    log_res_train_f1, log_res_sgd_train_f1, mod_huber_train_f1 = [], [], []

    print('Starting classification algorithm calculations on training data...')
    for train_ind, val_ind in kf.split(x, y):
        x_train, y_train = x[train_ind], y[train_ind]
        x_val, y_val = x[val_ind], y[val_ind]

        x_train_scale = scaler.fit_transform(x_train)
        x_val_scale = scaler.transform(x_val)

        #logistic regression
        log_reg_train = LogisticRegression(class_weight='balanced',
                                           solver='newton-cg',
                                           fit_intercept=True).fit(
                                               x_train_scale, y_train)
        log_reg_train_y_pred = log_reg_train.predict(x_val_scale)
        log_res_train_f1.append(
            f1_score(y_val, log_reg_train_y_pred, average='binary'))

        #loss=log
        sgd = linear_model.SGDClassifier(max_iter=1000,
                                         tol=1e-3,
                                         loss='log',
                                         class_weight='balanced').fit(
                                             x_train_scale, y_train)
        sgd_y_pred = sgd.predict(x_val_scale)
        log_res_sgd_train_f1.append(
            f1_score(y_val, sgd_y_pred, average='binary'))

        #modified huber
        sgd_huber = linear_model.SGDClassifier(max_iter=1000,
                                               tol=1e-3,
                                               alpha=20,
                                               loss='modified_huber',
                                               class_weight='balanced').fit(
                                                   x_train_scale, y_train)

        sgd_huber_y_pred = sgd_huber.predict(x_val_scale)
        mod_huber_train_f1.append(
            f1_score(y_val, sgd_huber_y_pred, average='binary'))

    print('Done with training data. Starting on testing data...\n')

    #gather all test tweets and apply the clean_data() and get_bigram() functions
    print('Cleaning testing data...')
    for row in test_data['tweets']:
        cleaned_status = clean_status(row)
        test_data_list.append(cleaned_status)
    bigrams = get_bigram(test_data_list)
    test_bigram = [bigrams[entry] for entry in test_data_list]
    test_corpus = [train_id2word.doc2bow(tweets) for tweets in test_bigram]

    #test model on testing data
    print('Starting classification algorithm calculations on testing data...')
    for i in range(len((test_data))):
        top_topics = model.get_document_topics(test_corpus[i],
                                               minimum_probability=0.0)
        topic_vector = [top_topics[i][1] for i in range(10)]
        test_vectors.append(topic_vector)

    x_test = np.array(test_vectors)
    y_test = np.array(test_data.relevant)
    x_fit = scaler.fit_transform(x_test)

    #logistic regression
    log_reg_test = LogisticRegression(class_weight='balanced',
                                      solver='newton-cg',
                                      fit_intercept=True).fit(x_fit, y_test)
    y_pred_log_res_test = log_reg_test.predict(x_test)

    #modified huber
    sgd_huber_test = linear_model.SGDClassifier(max_iter=1000,
                                                tol=1e-3,
                                                alpha=20,
                                                loss='modified_huber',
                                                class_weight='balanced',
                                                shuffle=True).fit(
                                                    x_fit, y_test)
    y_pred_huber_test = sgd_huber_test.predict(x_fit)

    #print results for both cases
    print('Calculating Summary...')
    y_target = y_test
    y_model1 = y_pred_log_res_test
    y_model2 = y_pred_huber_test

    m_table = mcnemar_table(y_target=y_test,
                            y_model1=y_model1,
                            y_model2=y_model2)

    chi2, p = mcnemar(ary=m_table, corrected=True)

    print('\n')
    print('Results from using training data distribution:')
    print(
        f'Logistic Regression Val f1: {np.mean(log_res_train_f1):.3f} +- {np.std(log_res_train_f1):.3f}'
    )
    print(
        f'Logisitic Regression SGD Val f1: {np.mean(log_res_sgd_train_f1):.3f} +- {np.std(log_res_sgd_train_f1):.3f}'
    )
    print(
        f'SVM Huber Val f1: {np.mean(mod_huber_train_f1):.3f} +- {np.std(mod_huber_train_f1):.3f}'
    )

    print('\n')
    print('Results from using unseen test data:')
    print('Logistic regression Val f1: ' +
          str(f1_score(y_test, y_pred_log_res_test, average='binary')))
    print('Logistic regression SGD f1: ' +
          str(f1_score(y_test, y_pred_huber_test, average='binary')))

    print('\n')
    print('Summary: ')
    print('ncmamor table: ', m_table)
    print('chi-squared: ', chi2)
    print('p-value: ', p)

    #Save feature vector and huber classifier for later use
    print('\n')
    print('Saving feature vector...')
    save_vector = open('saved_pickles_models/feature_vector.pkl', 'wb')
    pickle.dump(feature_vectors, save_vector)
    save_vector.close()

    print('\n')
    print('Saving the huber classifier...')
    save_huber = open('saved_pickles_models/huber_classifier.pkl', 'wb')
    pickle.dump(sgd_huber, save_huber)
    save_huber.close()
    print('done')
def main(
    mlflow_server: str,
    significance: float,
):
    # We start by setting the tracking uri to make sure the mlflow server is reachable
    mlflow.set_tracking_uri(mlflow_server)
    # We need to instantiate the MlflowClient class for certain operations
    mlflow_client = MlflowClient()
    # We create and set an experiment to group all runs
    mlflow.set_experiment("Model Comparison")

    # We create classification data and split it into training and testing sets
    X, y = make_classification(
        n_samples=10000,
        n_classes=2,
        n_features=20,
        n_informative=9,
        random_state=random_seed,
    )
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.8,
                                                        test_size=0.2)

    # We first train a Logistic regression model, log it in mlflow and then move it to the production stage
    with mlflow.start_run():
        lr_model = LogisticRegression()
        lr_model.fit(X_train, y_train)
        y_pred = lr_model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(lr_model,
                                 artifact_path="model",
                                 registered_model_name="Logistic Regression")
    mlflow_client.transition_model_version_stage(name="Logistic Regression",
                                                 version=1,
                                                 stage="Production")

    # We then train a Random Forest model, log it in mlflow and then move it to the staging stage
    with mlflow.start_run():
        rf_model = RandomForestClassifier()
        rf_model.fit(X_train, y_train)
        y_pred = rf_model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(rf_model,
                                 artifact_path="model",
                                 registered_model_name="Random Forest")
    mlflow_client.transition_model_version_stage(name="Random Forest",
                                                 version=1,
                                                 stage="Staging")

    del lr_model
    del rf_model

    # We finally load both models from MLFlow
    # and compare them using the McNemar test
    # We get the download uris of both models and then we load them
    lr_model_download_uri = mlflow_client.get_model_version_download_uri(
        name="Logistic Regression",
        version=1,
    )
    rf_model_download_uri = mlflow_client.get_model_version_download_uri(
        name="Random Forest",
        version=1,
    )
    lr_model = mlflow.sklearn.load_model(lr_model_download_uri)
    rf_model = mlflow.sklearn.load_model(rf_model_download_uri)

    y_pred_lr = lr_model.predict(X_test)
    y_pred_rf = rf_model.predict(X_test)

    contingency_table = mcnemar_table(y_test, y_pred_lr, y_pred_rf)
    _, p_value = mcnemar(contingency_table, corrected=True)

    if p_value < significance:
        # In this case we reject the null hypothesis that the two models' are similar
        # We then archive the logistic regression model
        # and move the random forest model to the Production stage
        print(
            f"p-value {p_value} smaller than significance level {significance}"
        )
        accuracy_lr = accuracy_score(y_test, y_pred_lr)
        accuracy_rf = accuracy_score(y_test, y_pred_rf)
        if accuracy_lr < accuracy_rf:
            print(
                f"Random Forest model's accuracy, {accuracy_rf}, is greater than "
                f"the Logistic Regression model's accuracy, {accuracy_lr}")
            print(
                "Archiving logistic regression model and moving random forest model to production"
            )
            mlflow_client.transition_model_version_stage(
                name="Logistic Regression",
                version=1,
                stage="Archived",
            )
            mlflow_client.transition_model_version_stage(
                name="Random Forest",
                version=1,
                stage="Production",
            )
        else:
            print(
                f"Random Forest model's accuracy, {accuracy_rf}, is less than or equal to "
                f"the Logistic Regression model's accuracy, {accuracy_lr}")
            print("Keeping logistic regression model in production")
    else:
        print(
            f"p-value {p_value} greater than significance level {significance}"
        )
        print("Keeping logistic regression model in production")
Example #27
0
        i = 1
        counter_1 += 1
        condel_binary.append(i)
        # print(1)
    elif i < 0.522:
        i = 0
        counter_0 += 1
        condel_binary.append(i)
        # print(0)
print('matthews_corr_coef (condel): ',
      matthews_corrcoef(true_class_binary, condel_binary))
#################################################################
'''SIFT'''

sift_and_model = mcnemar_table(y_target=np.array(true_class_binary),
                               y_model1=np.array(model_binary),
                               y_model2=np.array(sift_binary))
print('model & sift: ', '\n', sift_and_model)
chi2, p = mcnemar(ary=sift_and_model, corrected=True)
print(' chi_squared: ', chi2)
print(' p-value: ', p)

brd = checkerboard_plot(sift_and_model,
                        figsize=(2, 2),
                        fmt='%d',
                        col_labels=['model 2 wrong', 'model 2 right'],
                        row_labels=['model 1 wrong', 'model 1 right'])
plt.show()
'''PPH2'''

pph2_and_model = mcnemar_table(y_target=np.array(true_class_binary),
Example #28
0
    l_pval.append("%0.5f" % result[1])

    result = ttest_rel(df_result['bow_uni_R'].to_numpy(),
                       df_result['tfidf_uni_R'].to_numpy())
    print("Paired Test R: %0.5f, %0.5f" % result)
    l_stat.append("%0.5f" % result[0])
    l_pval.append("%0.5f" % result[1])

    result = ttest_rel(df_result['bow_uni_F1'].to_numpy(),
                       df_result['tfidf_uni_F1'].to_numpy())
    print("Paired Test F1: %0.5f, %0.5f" % result)
    l_stat.append("%0.5f" % result[0])
    l_pval.append("%0.5f" % result[1])

    # McNemar Test
    y_bow_uni = sr_bow_uni.to_numpy()
    y_tfidf_uni = sr_tfidf_uni.to_numpy()
    tb = mcnemar_table(y_target=y, y_model1=y_bow_uni, y_model2=y_tfidf_uni)
    chi2, p = mcnemar(ary=tb, corrected=True)
    print(tb)
    print("Mcnemar: chi2: %0.5f %0.5f" % (chi2, p))

#    print("T_STAT: ", ' '.join(l_stat))
#    print("P_VAL: ", ' '.join(l_pval))

#    df_out = pd.DataFrame()
#    df_out['Y_TRUE'] = df['label']
#    df_out['Y_BOW_UNI'] = sr_bow_uni
#    df_out['Y_TFIDF_UNI'] = sr_tfidf_uni
#    df_out.to_excel("RESULT.xlsx")
Example #29
0
                if len(g[1].strip()) == 1:
                    y_true.append(0)
                else:
                    y_true.append(1)

# Print evaluation report, confusion matrix and f2 scores
print("Evaluation full featured model:")
print(classification_report(y_true, y_pred2, labels=[1, 0]))
print("Confusion matrix:")
print(confusion_matrix(y_true, y_pred2, labels=[1, 0]))

print("MCC", matthews_corrcoef(y_true, y_pred2))
print("F2 - None", fbeta_score(y_true, y_pred2, average=None, beta=2))
print("F2 - weighted", fbeta_score(y_true, y_pred2, average='weighted',
                                   beta=2))
print("F2 - micro", fbeta_score(y_true, y_pred2, average='micro', beta=2))
print("F2 - macro", fbeta_score(y_true, y_pred2, average='macro', beta=2))

# McNemar test

y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_pred2 = np.array(y_pred2)

tb = mcnemar_table(y_target=y_true, y_model1=y_pred, y_model2=y_pred2)
print("McNemar contigency table")
print(tb)

chi2, p = mcnemar(ary=tb, corrected=True)
print('chi-squared:', chi2)
print('p-value:', p)