def eval_nested_kfold(model, name, loc='./data/', k=10, seed=1234, use_nb=False): """ Evaluate features with nested K-fold cross validation Outer loop: Held-out evaluation Inner loop: Hyperparameter tuning Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA' """ # Load the dataset and extract features z, features = dataset_handler.load_data(model, name, loc=loc, seed=seed) scan = [2**t for t in range(0,9,1)] npts = len(z['text']) kf = KFold(npts, n_folds=k, random_state=seed) scores = [] for train, test in kf: # Split data X_train = features[train] y_train = z['labels'][train] X_test = features[test] y_test = z['labels'][test] Xraw = [z['text'][i] for i in train] Xraw_test = [z['text'][i] for i in test] scanscores = [] for s in scan: # Inner KFold innerkf = KFold(len(X_train), n_folds=k, random_state=seed+1) innerscores = [] for innertrain, innertest in innerkf: # Split data X_innertrain = X_train[innertrain] y_innertrain = y_train[innertrain] X_innertest = X_train[innertest] y_innertest = y_train[innertest] Xraw_innertrain = [Xraw[i] for i in innertrain] Xraw_innertest = [Xraw[i] for i in innertest] # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain, Xraw_innertest) X_innertrain = hstack((X_innertrain, NBtrain)) X_innertest = hstack((X_innertest, NBtest)) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_innertrain, y_innertrain) acc = clf.score(X_innertest, y_innertest) innerscores.append(acc) print (s, acc) # Append mean score scanscores.append(np.mean(innerscores)) # Get the index of the best score s_ind = np.argmax(scanscores) s = scan[s_ind] print scanscores print s # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test) X_train = hstack((X_train, NBtrain)) X_test = hstack((X_test, NBtest)) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) # Evaluate acc = clf.score(X_test, y_test) scores.append(acc) print scores return scores
def eval_nested_kfold(model, name, loc='./data/', k=10, seed=1234, use_nb=False): """ Evaluate features with nested K-fold cross validation Outer loop: Held-out evaluation Inner loop: Hyperparameter tuning Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA' """ # Load the dataset and extract features z, features = dataset_handler.load_data(model, name, loc=loc, seed=seed) scan = [2**t for t in range(0,9,1)] npts = len(z['text']) kf = KFold(npts, n_folds=k, random_state=seed) scores = [] for train, test in kf: # Split data X_train = features[train] y_train = z['labels'][train] X_test = features[test] y_test = z['labels'][test] Xraw = [z['text'][i] for i in train] Xraw_test = [z['text'][i] for i in test] scanscores = [] for s in scan: # Inner KFold innerkf = KFold(len(X_train), n_folds=k, random_state=seed+1) innerscores = [] for innertrain, innertest in innerkf: # Split data X_innertrain = X_train[innertrain] y_innertrain = y_train[innertrain] X_innertest = X_train[innertest] y_innertest = y_train[innertest] Xraw_innertrain = [Xraw[i] for i in innertrain] Xraw_innertest = [Xraw[i] for i in innertest] # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain, Xraw_innertest) X_innertrain = hstack((X_innertrain, NBtrain)) X_innertest = hstack((X_innertest, NBtest)) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_innertrain, y_innertrain) acc = clf.score(X_innertest, y_innertest) innerscores.append(acc) print((s, acc)) # Append mean score scanscores.append(np.mean(innerscores)) # Get the index of the best score s_ind = np.argmax(scanscores) s = scan[s_ind] print(scanscores) print(s) # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test) X_train = hstack((X_train, NBtrain)) X_test = hstack((X_test, NBtest)) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) # Evaluate acc = clf.score(X_test, y_test) scores.append(acc) print(scores) return scores
def eval_nested_kfold(encoder, name, loc='./data/', k=10, seed=1234, use_nb=False): """ Evaluate features with nested K-fold cross validation Outer loop: Held-out evaluation Inner loop: Hyperparameter tuning Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA' """ # Load the dataset and extract features z, features = dataset_handler.load_data(encoder, name, loc=loc, seed=seed) scan = [2**t for t in range(0, 9, 1)] npts = len(z['text']) kf = KFold(npts, n_folds=k, random_state=seed) scores = [] for train, test in kf: # Split data X_train = features[train] y_train = z['labels'][train] X_test = features[test] y_test = z['labels'][test] Xraw = [z['text'][i] for i in train] Xraw_test = [z['text'][i] for i in test] scanscores = [] for s in scan: # Inner KFold innerkf = KFold(len(X_train), n_folds=k, random_state=seed + 1) innerscores = [] for innertrain, innertest in innerkf: # Split data X_innertrain = X_train[innertrain] y_innertrain = y_train[innertrain] X_innertest = X_train[innertest] y_innertest = y_train[innertest] Xraw_innertrain = [Xraw[i] for i in innertrain] Xraw_innertest = [Xraw[i] for i in innertest] # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain, Xraw_innertest) X_innertrain = hstack((X_innertrain, NBtrain)) X_innertest = hstack((X_innertest, NBtest)) # Train classifier clf = LogisticRegression(C=s, penalty='l1') clf.fit(X_innertrain, y_innertrain) acc = clf.score(X_innertest, y_innertest) innerscores.append(acc) # keep the accuracies from each fold print( s, acc ) # print the reg coeff and the accuracy for each fold tuple - (reg, acc on fold) # Append mean score scanscores.append( np.mean(innerscores) ) # find the mean accuracy across the folds for each reg coefficient # Get the index of the best score s_ind = np.argmax(scanscores) s = scan[s_ind] print scanscores # this is the mean score (from 10 folds of the training set) for each reg coefficient print s # optimal regularization coefficient for this train/test split # NB (if applicable) if use_nb: NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test) X_train = hstack((X_train, NBtrain)) X_test = hstack((X_test, NBtest)) # Train classifier clf = LogisticRegression( C=s, penalty='l1' ) # train a classifier with this reg coeff on the outer train/test split clf.fit(X_train, y_train) # Evaluate acc = clf.score(X_test, y_test) scores.append(acc) print scores # test set scores print('highest accuracy: ', max(scores)) return scores
mask_flatten = bit_masks[i].flatten() for j in range(len(mask_flatten)): if mask_flatten[j] <= 0.2: X_flatten[j] = 0 predicted.append(X_flatten.reshape(256,256)) np.asarray(predicted) # plot each original and reconstructed image for i in range(len(X_test)): plt.subplot(3,1,1) plt.imshow(X_test[i].reshape(256,256),cmap="gray") plt.imsave(arr=X_test[i].reshape(256,256),fname="/Users/jingyue/Desktop/test_image/origin_{}".format(i),cmap="gray") plt.subplot(3,1,2) plt.imshow(predicted[i].reshape(256,256),cmap="gray") plt.imsave(arr=predicted[i].reshape(256,256),fname="/Users/jingyue/Desktop/test_image/stripped_{}".format(i),cmap="gray") plt.subplot(3,1,3) plt.imshow(bit_masks[i].reshape(256,256),cmap="gray") plt.show() if __name__ == '__main__': X_train,y_train,X_test,y_test = load_data(30,"/Users/jingyue/Desktop/MRI_Dataset") # train(X_train,y_train,num_epochs=NUM_OF_EPOCHS, batch_size=BATCH_SIZE, plot = True) predict(X_test)
def evaluate(encoder, loc='./', k=10): seed=1234 z, features = dataset_handler.load_data(encoder, loc=loc, seed=seed) scan = [2**t for t in range(-4,9,1)] npts = len(z['text']) kf = KFold(n_splits=k, random_state=seed) scores = [] for train, test in kf.split(features): # Split data X_train = features[train] y_train = z['labels'][train] X_test = features[test] y_test = z['labels'][test] Xraw = [z['text'][i] for i in train] Xraw_test = [z['text'][i] for i in test] scanscores = [] for s in scan: # Inner KFold innerkf = KFold(n_splits= k, random_state=seed+1) innerscores = [] for innertrain, innertest in innerkf.split(X_train): # Split data X_innertrain = X_train[innertrain] y_innertrain = y_train[innertrain] X_innertest = X_train[innertest] y_innertest = y_train[innertest] Xraw_innertrain = [Xraw[i] for i in innertrain] Xraw_innertest = [Xraw[i] for i in innertest] # Train classifier clf = LogisticRegression(C=s) clf.fit(X_innertrain, y_innertrain) acc = clf.score(X_innertest, y_innertest) innerscores.append(acc) print (s, acc) # Append mean score scanscores.append(np.mean(innerscores)) # Get the index of the best score s_ind = np.argmax(scanscores) s = scan[s_ind] print(scanscores) #print(s) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) # Evaluate acc = clf.score(X_test, y_test) scores.append(acc) print(ac) print("-------------------------------------------------------") return scores
def perform_exp(cnn_model, word_to_index, experiments): ''' Perform the listed classification experiments. Modelled off of skip-thought. ''' for exp in experiments: print('--------------------------------------------') if exp == 'TREC': perform_trec_exp(cnn_model['sess'], cnn_model['model_output'], cnn_model['placeholders'][0], cnn_model['placeholders'][2], cnn_model['placeholders'][1], word_to_index) else: # Load the dataset and extract features z, features = dataset_handler.load_data(cnn_model, word_to_index, exp) scan = [2**t for t in range(0, 9, 1)] kf = KFold(n_splits=10, random_state=1234) scores = [] for train, test in kf.split(features): # Split data X_train = features[train] y_train = z['labels'][train] X_test = features[test] y_test = z['labels'][test] scanscores = [] for s in scan: # Inner KFold innerkf = KFold(n_splits=10, random_state=1234 + 1) innerscores = [] for innertrain, innertest in innerkf.split(X_train): # Split data X_innertrain = X_train[innertrain] y_innertrain = y_train[innertrain] X_innertest = X_train[innertest] y_innertest = y_train[innertest] # Train classifier clf = LogisticRegression(C=s) clf.fit(X_innertrain, y_innertrain) acc = clf.score(X_innertest, y_innertest) innerscores.append(acc) # Append mean score scanscores.append(np.mean(innerscores)) # Get the index of the best score s_ind = np.argmax(scanscores) s = scan[s_ind] print('Best value for C: {}'.format(s)) # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) # Evaluate acc = clf.score(X_test, y_test) scores.append(acc) print('{} classification accuracy: {}'.format( exp, np.mean(scores)))
from functools import lru_cache import tensorflow.contrib as tfc import os import numpy as np import tensorflow as tf from tqdm import tqdm from dataset_handler import load_data from plotting import plot tf.reset_default_graph() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' train, test, handle, inputs, labels = load_data() def make_handle(sess, dataset): # To enumerate through the dataset, it will return an uninitialized iterator iterator = dataset.make_initializable_iterator() handle, _ = sess.run([iterator.string_handle(), iterator.initializer]) return handle # normalize the inputs and flatten them inputs = tf.cast(inputs, tf.float32) / 255.0 inputs = tf.layers.flatten(inputs) # cast labels to integers each representing a class labels = tf.cast(labels, tf.int32)