def eval_nested_kfold(model, name, loc='./data/', k=10, seed=1234, use_nb=False):
    """
    Evaluate features with nested K-fold cross validation
    Outer loop: Held-out evaluation
    Inner loop: Hyperparameter tuning

    Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm
    Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA'
    """
    # Load the dataset and extract features
    z, features = dataset_handler.load_data(model, name, loc=loc, seed=seed)

    scan = [2**t for t in range(0,9,1)]
    npts = len(z['text'])
    kf = KFold(npts, n_folds=k, random_state=seed)
    scores = []
    for train, test in kf:

        # Split data
        X_train = features[train]
        y_train = z['labels'][train]
        X_test = features[test]
        y_test = z['labels'][test]

        Xraw = [z['text'][i] for i in train]
        Xraw_test = [z['text'][i] for i in test]

        scanscores = []
        for s in scan:

            # Inner KFold
            innerkf = KFold(len(X_train), n_folds=k, random_state=seed+1)
            innerscores = []
            for innertrain, innertest in innerkf:
        
                # Split data
                X_innertrain = X_train[innertrain]
                y_innertrain = y_train[innertrain]
                X_innertest = X_train[innertest]
                y_innertest = y_train[innertest]

                Xraw_innertrain = [Xraw[i] for i in innertrain]
                Xraw_innertest = [Xraw[i] for i in innertest]

                # NB (if applicable)
                if use_nb:
                    NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain, Xraw_innertest)
                    X_innertrain = hstack((X_innertrain, NBtrain))
                    X_innertest = hstack((X_innertest, NBtest))

                # Train classifier
                clf = LogisticRegression(C=s)
                clf.fit(X_innertrain, y_innertrain)
                acc = clf.score(X_innertest, y_innertest)
                innerscores.append(acc)
                print (s, acc)

            # Append mean score
            scanscores.append(np.mean(innerscores))

        # Get the index of the best score
        s_ind = np.argmax(scanscores)
        s = scan[s_ind]
        print scanscores
        print s
 
        # NB (if applicable)
        if use_nb:
            NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test)
            X_train = hstack((X_train, NBtrain))
            X_test = hstack((X_test, NBtest))
       
        # Train classifier
        clf = LogisticRegression(C=s)
        clf.fit(X_train, y_train)

        # Evaluate
        acc = clf.score(X_test, y_test)
        scores.append(acc)
        print scores

    return scores
Esempio n. 2
0
def eval_nested_kfold(model, name, loc='./data/', k=10, seed=1234, use_nb=False):
    """
    Evaluate features with nested K-fold cross validation
    Outer loop: Held-out evaluation
    Inner loop: Hyperparameter tuning

    Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm
    Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA'
    """
    # Load the dataset and extract features
    z, features = dataset_handler.load_data(model, name, loc=loc, seed=seed)

    scan = [2**t for t in range(0,9,1)]
    npts = len(z['text'])
    kf = KFold(npts, n_folds=k, random_state=seed)
    scores = []
    for train, test in kf:

        # Split data
        X_train = features[train]
        y_train = z['labels'][train]
        X_test = features[test]
        y_test = z['labels'][test]

        Xraw = [z['text'][i] for i in train]
        Xraw_test = [z['text'][i] for i in test]

        scanscores = []
        for s in scan:

            # Inner KFold
            innerkf = KFold(len(X_train), n_folds=k, random_state=seed+1)
            innerscores = []
            for innertrain, innertest in innerkf:
        
                # Split data
                X_innertrain = X_train[innertrain]
                y_innertrain = y_train[innertrain]
                X_innertest = X_train[innertest]
                y_innertest = y_train[innertest]

                Xraw_innertrain = [Xraw[i] for i in innertrain]
                Xraw_innertest = [Xraw[i] for i in innertest]

                # NB (if applicable)
                if use_nb:
                    NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain, Xraw_innertest)
                    X_innertrain = hstack((X_innertrain, NBtrain))
                    X_innertest = hstack((X_innertest, NBtest))

                # Train classifier
                clf = LogisticRegression(C=s)
                clf.fit(X_innertrain, y_innertrain)
                acc = clf.score(X_innertest, y_innertest)
                innerscores.append(acc)
                print((s, acc))

            # Append mean score
            scanscores.append(np.mean(innerscores))

        # Get the index of the best score
        s_ind = np.argmax(scanscores)
        s = scan[s_ind]
        print(scanscores)
        print(s)
 
        # NB (if applicable)
        if use_nb:
            NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test)
            X_train = hstack((X_train, NBtrain))
            X_test = hstack((X_test, NBtest))
       
        # Train classifier
        clf = LogisticRegression(C=s)
        clf.fit(X_train, y_train)

        # Evaluate
        acc = clf.score(X_test, y_test)
        scores.append(acc)
        print(scores)

    return scores
Esempio n. 3
0
def eval_nested_kfold(encoder,
                      name,
                      loc='./data/',
                      k=10,
                      seed=1234,
                      use_nb=False):
    """
    Evaluate features with nested K-fold cross validation
    Outer loop: Held-out evaluation
    Inner loop: Hyperparameter tuning

    Datasets can be found at http://nlp.stanford.edu/~sidaw/home/projects:nbsvm
    Options for name are 'MR', 'CR', 'SUBJ' and 'MPQA'
    """
    # Load the dataset and extract features
    z, features = dataset_handler.load_data(encoder, name, loc=loc, seed=seed)

    scan = [2**t for t in range(0, 9, 1)]
    npts = len(z['text'])
    kf = KFold(npts, n_folds=k, random_state=seed)
    scores = []
    for train, test in kf:

        # Split data
        X_train = features[train]
        y_train = z['labels'][train]
        X_test = features[test]
        y_test = z['labels'][test]

        Xraw = [z['text'][i] for i in train]
        Xraw_test = [z['text'][i] for i in test]

        scanscores = []
        for s in scan:

            # Inner KFold
            innerkf = KFold(len(X_train), n_folds=k, random_state=seed + 1)
            innerscores = []
            for innertrain, innertest in innerkf:

                # Split data
                X_innertrain = X_train[innertrain]
                y_innertrain = y_train[innertrain]
                X_innertest = X_train[innertest]
                y_innertest = y_train[innertest]

                Xraw_innertrain = [Xraw[i] for i in innertrain]
                Xraw_innertest = [Xraw[i] for i in innertest]

                # NB (if applicable)
                if use_nb:
                    NBtrain, NBtest = compute_nb(Xraw_innertrain, y_innertrain,
                                                 Xraw_innertest)
                    X_innertrain = hstack((X_innertrain, NBtrain))
                    X_innertest = hstack((X_innertest, NBtest))

                # Train classifier
                clf = LogisticRegression(C=s, penalty='l1')
                clf.fit(X_innertrain, y_innertrain)
                acc = clf.score(X_innertest, y_innertest)
                innerscores.append(acc)  # keep the accuracies from each fold
                print(
                    s, acc
                )  # print the reg coeff and the accuracy for each fold tuple - (reg, acc on fold)

            # Append mean score
            scanscores.append(
                np.mean(innerscores)
            )  # find the mean accuracy across the folds for each reg coefficient

        # Get the index of the best score
        s_ind = np.argmax(scanscores)
        s = scan[s_ind]
        print scanscores  # this is the mean score (from 10 folds of the training set) for each reg coefficient
        print s  # optimal regularization coefficient for this train/test split

        # NB (if applicable)
        if use_nb:
            NBtrain, NBtest = compute_nb(Xraw, y_train, Xraw_test)
            X_train = hstack((X_train, NBtrain))
            X_test = hstack((X_test, NBtest))

        # Train classifier
        clf = LogisticRegression(
            C=s, penalty='l1'
        )  # train a classifier with this reg coeff on the outer train/test split
        clf.fit(X_train, y_train)

        # Evaluate
        acc = clf.score(X_test, y_test)
        scores.append(acc)
        print scores  # test set scores
    print('highest accuracy: ', max(scores))
    return scores
Esempio n. 4
0
            mask_flatten = bit_masks[i].flatten()
            
            for j in range(len(mask_flatten)):
                
                if mask_flatten[j] <= 0.2:
                    X_flatten[j] = 0
            
            predicted.append(X_flatten.reshape(256,256))

        np.asarray(predicted)

        # plot each original and reconstructed image
        for i in range(len(X_test)):
            plt.subplot(3,1,1)
            plt.imshow(X_test[i].reshape(256,256),cmap="gray")
            plt.imsave(arr=X_test[i].reshape(256,256),fname="/Users/jingyue/Desktop/test_image/origin_{}".format(i),cmap="gray")
            
            plt.subplot(3,1,2)
            plt.imshow(predicted[i].reshape(256,256),cmap="gray")
            plt.imsave(arr=predicted[i].reshape(256,256),fname="/Users/jingyue/Desktop/test_image/stripped_{}".format(i),cmap="gray")

            plt.subplot(3,1,3)
            plt.imshow(bit_masks[i].reshape(256,256),cmap="gray")
            
            plt.show()
            
if __name__ == '__main__':
    
    X_train,y_train,X_test,y_test = load_data(30,"/Users/jingyue/Desktop/MRI_Dataset") 
    # train(X_train,y_train,num_epochs=NUM_OF_EPOCHS, batch_size=BATCH_SIZE, plot = True)
    predict(X_test)
Esempio n. 5
0
def evaluate(encoder, loc='./', k=10):
    seed=1234
    z, features = dataset_handler.load_data(encoder, loc=loc, seed=seed)

    scan = [2**t for t in range(-4,9,1)]
    npts = len(z['text'])
    kf = KFold(n_splits=k, random_state=seed)
    scores = []
    for train, test in kf.split(features):

        # Split data
        X_train = features[train]
        y_train = z['labels'][train]
        X_test = features[test]
        y_test = z['labels'][test]

        Xraw = [z['text'][i] for i in train]
        Xraw_test = [z['text'][i] for i in test]

        scanscores = []
        for s in scan:

            # Inner KFold
            innerkf = KFold(n_splits= k, random_state=seed+1)
            innerscores = []
            for innertrain, innertest in innerkf.split(X_train):
        
                # Split data
                X_innertrain = X_train[innertrain]
                y_innertrain = y_train[innertrain]
                X_innertest = X_train[innertest]
                y_innertest = y_train[innertest]

                Xraw_innertrain = [Xraw[i] for i in innertrain]
                Xraw_innertest = [Xraw[i] for i in innertest]

                # Train classifier
                clf = LogisticRegression(C=s)
                clf.fit(X_innertrain, y_innertrain)
                acc = clf.score(X_innertest, y_innertest)
                innerscores.append(acc)
                print (s, acc)

            # Append mean score
            scanscores.append(np.mean(innerscores))

        # Get the index of the best score
        s_ind = np.argmax(scanscores)
        s = scan[s_ind]
        print(scanscores)
        #print(s)
       
        # Train classifier
        clf = LogisticRegression(C=s)
        clf.fit(X_train, y_train)

        # Evaluate
        acc = clf.score(X_test, y_test)
        scores.append(acc)
        print(ac)
        print("-------------------------------------------------------")

    return scores
def perform_exp(cnn_model, word_to_index, experiments):
    '''
    Perform the listed classification experiments. Modelled off of skip-thought.
    '''

    for exp in experiments:
        print('--------------------------------------------')
        if exp == 'TREC':
            perform_trec_exp(cnn_model['sess'], cnn_model['model_output'],
                             cnn_model['placeholders'][0],
                             cnn_model['placeholders'][2],
                             cnn_model['placeholders'][1], word_to_index)
        else:
            # Load the dataset and extract features
            z, features = dataset_handler.load_data(cnn_model, word_to_index,
                                                    exp)

            scan = [2**t for t in range(0, 9, 1)]
            kf = KFold(n_splits=10, random_state=1234)
            scores = []
            for train, test in kf.split(features):
                # Split data
                X_train = features[train]
                y_train = z['labels'][train]
                X_test = features[test]
                y_test = z['labels'][test]

                scanscores = []
                for s in scan:

                    # Inner KFold
                    innerkf = KFold(n_splits=10, random_state=1234 + 1)
                    innerscores = []
                    for innertrain, innertest in innerkf.split(X_train):

                        # Split data
                        X_innertrain = X_train[innertrain]
                        y_innertrain = y_train[innertrain]
                        X_innertest = X_train[innertest]
                        y_innertest = y_train[innertest]

                        # Train classifier
                        clf = LogisticRegression(C=s)
                        clf.fit(X_innertrain, y_innertrain)
                        acc = clf.score(X_innertest, y_innertest)
                        innerscores.append(acc)

                    # Append mean score
                    scanscores.append(np.mean(innerscores))

                # Get the index of the best score
                s_ind = np.argmax(scanscores)
                s = scan[s_ind]
                print('Best value for C: {}'.format(s))

                # Train classifier
                clf = LogisticRegression(C=s)
                clf.fit(X_train, y_train)

                # Evaluate
                acc = clf.score(X_test, y_test)
                scores.append(acc)

            print('{} classification accuracy: {}'.format(
                exp, np.mean(scores)))
Esempio n. 7
0
from functools import lru_cache
import tensorflow.contrib as tfc
import os

import numpy as np
import tensorflow as tf
from tqdm import tqdm

from dataset_handler import load_data
from plotting import plot

tf.reset_default_graph()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

train, test, handle, inputs, labels = load_data()


def make_handle(sess, dataset):
    # To enumerate through the dataset, it will return an uninitialized iterator
    iterator = dataset.make_initializable_iterator()

    handle, _ = sess.run([iterator.string_handle(), iterator.initializer])
    return handle


# normalize the inputs and flatten them
inputs = tf.cast(inputs, tf.float32) / 255.0
inputs = tf.layers.flatten(inputs)
# cast labels to integers each representing a class
labels = tf.cast(labels, tf.int32)