def test_mean_quadratic_weighted_kappa(self): kappa = metrics.mean_quadratic_weighted_kappa([1, 1]) self.assertAlmostEqual(kappa, 0.999) kappa = metrics.mean_quadratic_weighted_kappa([0.5, 0.8], [1,.5]) self.assertAlmostEqual(kappa, 0.624536446425734) kappa = metrics.mean_quadratic_weighted_kappa([-1, 1]) self.assertAlmostEqual(kappa, 0.0)
def test_mean_quadratic_weighted_kappa(self): kappa = metrics.mean_quadratic_weighted_kappa([1, 1]) self.assertAlmostEqual(kappa, 0.999) kappa = metrics.mean_quadratic_weighted_kappa([0.5, 0.8], [1, .5]) self.assertAlmostEqual(kappa, 0.624536446425734) kappa = metrics.mean_quadratic_weighted_kappa([-1, 1]) self.assertAlmostEqual(kappa, 0.0)
def asap_cv_cnn_multi(): maxlen = 75 nb_words = 4500 embd_dim = 50 nb_pos = 15 folds = (1,2,3,4,5,6,7,8,9,10) trains = ['data/asap2/train'+str(fold)+'.csv' for fold in folds] tests = ['data/asap2/test'+str(fold)+'.csv' for fold in folds] pos_tas = ['data/asap2/pos/train'+str(fold)+'_pos.csv' for fold in folds] pos_tss = ['data/asap2/pos/test'+str(fold)+'_pos.csv' for fold in folds] dp_tas = ['data/asap2/dp/train'+str(fold)+'_dp.csv' for fold in folds] dp_tss = ['data/asap2/dp/test'+str(fold)+'_dp.csv' for fold in folds] pairs = zip(trains, tests, pos_tas, pos_tss, dp_tas, dp_tss) kappas = [] for (train, test, pos_ta, pos_ts, dp_ta, dp_ts) in pairs: print(train + '=>' + test) X_train, Y_train, X_test, Y_test, nb_classes = load_csvs(train, test, nb_words, maxlen, embd_type='self', w2v=None) pos_train, foo1, pos_test, foo2, foo3 = load_csvs(pos_ta, pos_ts, nb_pos, maxlen, embd_type='self', w2v=None) dp_train, foo1, dp_test, foo2, foo3 = load_csvs(dp_ta, dp_ts, nb_words, maxlen, embd_type='self', w2v=None) kappa = cnn_multi_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, nb_words, embd_dim, pos_train, pos_test, 10, dp_train, dp_test, 40, 50, 32, 30, 'rmsprop') kappas.append(kappa) kappa_cv = metrics.mean_quadratic_weighted_kappa(kappas) print('after 10-fold cv:' + str(kappa_cv))
def calc_mqwp(output): """ Calculate the mean quadratic_weighted_kappa across all the question sets :param outputs: dataframe containing target, output, question set :return: mean quadratic weighted kappa """ groups = output.groupby('set') kappas = [ quadratic_weighted_kappa(group[1]["output"], group[1]["target"]) for group in groups ] print('Kappa of each set: ', kappas) mean = mean_quadratic_weighted_kappa(kappas) return mean
def asap_cv_cnnvar(): maxlen = 75 nb_words = 4500 embd_dim = 50 folds = (1,2,3,4,5,6,7,8,9,10) trains = ['data/asap2/train'+str(fold)+'.csv' for fold in folds] tests = ['data/asap2/test'+str(fold)+'.csv' for fold in folds] pairs = zip(trains, tests) kappas = [] for (train, test) in pairs: print(train + '=>' + test) X_train, Y_train, X_test, Y_test, nb_classes = load_csvs(train, test, nb_words, maxlen, embd_type='self', w2v=None) kappa = cnn_var_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, nb_words, embd_dim, 50, 32, 30, 'rmsprop') kappas.append(kappa) kappa_cv = metrics.mean_quadratic_weighted_kappa(kappas) print('after 10-fold cv:' + str(kappa_cv))