def ted_cv_w2v(): maxlen = 20 folds = range(1,11) trains = ['data/TED/train'+str(fold)+'.csv' for fold in folds] tests = ['data/TED/test'+str(fold)+'.csv' for fold in folds] pairs = zip(trains, tests) accs = [] for (train, test) in pairs: print(train + '=>' + test) X_train, Y_train, X_test, Y_test, nb_classes = load_csvs(train, test, 0, maxlen, embd_type='w2v') acc = cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, 100, 5, 50, 20, 'rmsprop') accs.append(acc) acc_cv = np.mean(accs) print('after 10-fold cv:' + str(acc_cv))
def asap_cv_w2v(): maxlen = 40 folds = range(1,11) trains = ['data/asap2/train'+str(fold)+'.csv' for fold in folds] tests = ['data/asap2/test'+str(fold)+'.csv' for fold in folds] pairs = zip(trains, tests) w2v = load_w2v('data/Google_w2v.bin') print("loaded Google word2vec") kappas = [] for (train, test) in pairs: print(train + '=>' + test) X_train, Y_train, X_test, Y_test, nb_classes = load_csvs(train, test, 0, maxlen, embd_type='w2v', w2v=w2v) kappa = cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, 100, 3, 50, 20, 'rmsprop') kappas.append(kappa) kappa_cv = np.mean(kappas) print('after 10-fold cv:' + str(kappa_cv))
def pun_cv_w2v(): maxlen = 20 folds = range(1,11) trains = ['data/pun_of_day/train'+str(fold)+'.csv' for fold in folds] tests = ['data/pun_of_day/test'+str(fold)+'.csv' for fold in folds] pairs = zip(trains, tests) w2v = load_w2v('data/Google_w2v.bin') print("loaded Google word2vec") accs = [] for (train, test) in pairs: print(train + '=>' + test) X_train, Y_train, X_test, Y_test, nb_classes = load_csvs(train, test, 0, maxlen, embd_type='w2v', w2v=w2v) acc = cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, 100, 5, 50, 20, 'rmsprop') accs.append(acc) acc_cv = np.mean(accs) print('after 10-fold cv:' + str(acc_cv))
def test_ted_w2v(): maxlen = 20 X_train, Y_train, X_test, Y_test, nb_classes = load_ted(0, maxlen, 'w2v') cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, 100, 5, 50, 20, 'rmsprop')
def test_sg15_w2v(): maxlen = 120 X_train, Y_train, X_test, Y_test, nb_classes = load_sg15(0, maxlen, 'w2v') cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, 100, 10, 64, 20, 'rmsprop')