Python plotCost Examples

Programming Language: Python

Namespace/Package Name: utilities.visualize

Method/Function: plotCost

Examples at hotexamples.com: 10

Python plotCost - 10 examples found. These are the top rated real world Python examples of utilities.visualize.plotCost extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tensorflow_SF.py Project: dlacombejr/sparse_filtering

def main():

    # define global parameters
    filename = 'patches.mat'
    n_filters = 100
    learn_rate = 0.001
    iterations = [200]

    # load in data and preprocess
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = loadmat(file_path)['X']
    data -= data.mean(axis=0)
    data = np.float32(data.T)

    # construct the network
    print "building model..."
    weights = tf.Variable(tf.random_uniform([n_filters, data.shape[1]]))
    model = SparseFilter(weights, data)

    # define loss, optimizer, and train function
    loss = tf.reduce_sum(model.feed_forward())
    optimizer = tf.train.GradientDescentOptimizer(learn_rate)
    train = optimizer.minimize(loss)

    # initialize all the variables
    init = tf.initialize_all_variables()

    # run the session
    sess = tf.Session()
    sess.run(init)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost_running = []

    # iterate over training epochs
    for epoch in xrange(iterations[0]):

        sess.run(train)
        current_cost = sess.run(loss)
        cost_running.append(current_cost)
        print("Cost at epoch %i: %0.4f" % (epoch, current_cost))

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    c = {'layer0': cost_running}
    visualize.plotCost(c)

    # visualize the receptive fields of the first layer
    weights_final = sess.run(weights)
    print weights_final.shape
    visualize.drawplots(weights_final.T, color='gray', convolution='n',
                        pad=0, examples=None, channels=1)

Example #2

Show file

                print("Layer %i cost at epoch %i and batch %i: %f" %
                      (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # save model to dictionary
    models[model_type[0]] = model

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    visualize.plotCost(cost)

    # visualize the receptive fields of the first layer
    visualize.drawplots(weights['layer0'].T,
                        color='gray',
                        convolution=convolution,
                        pad=0,
                        examples=None,
                        channels=channels)

    # get activations of first layer and save in dictionary
    f_hat, _, _, _, _, _ = outputs[0](data)
    f_hats[model_type[0]] = f_hat

# project activations of both networks up using local connections
group_matrix = connections.gMatToroidal(n_filters,

Example #3

Show file

import numpy as np
from utilities.visualize import plotCost, drawplots
from scipy.io import loadmat
    

#################### MAIN SCRIPT #########################
    
# load data, normalize, and convert to float32
basepath = os.path.dirname(__file__)
filename = 'patches.mat'
filepath = os.path.join(basepath, "data", filename)
data = loadmat(filepath)['X']
data -= data.mean(axis=0)
data = np.float32(data)

# construct the network
wDims = [[100, 256]]
model = sparse_filtering.network(model_type='SF', wDims=wDims, p=None, 
                 gMat=None, gSize=None, step=None, lr=0.01)
train = model.training_functions(data)

# train the sparse filtering network
maxIter = 100
cost_master = []
for i in range(maxIter):
    cost, w = train[0]()
    cost_master.append(cost)
    print("The cost at iteration %i: %f" %(i, cost))

plotCost(cost_master)
drawplots(w.T)

Example #4

Show file

File: test.py Project: fengjiran/sparse_filtering

def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1

        In the convolutional case, the extra "1" is added automatically for broadcasting.
        -------------------------------------------------------------------------------------------------------------
        '''))
    parser.add_argument("-m",
                        "--model",
                        default=['SparseFilter'],
                        nargs='+',
                        help="the model type")
    parser.add_argument("-c",
                        "--convolution",
                        default="n",
                        help="convolution, yes or no")
    parser.add_argument("-f",
                        "--filename",
                        default="patches.mat",
                        help="the data filename")
    parser.add_argument(
        "-d",
        "--dimensions",
        type=int,
        nargs='+',
        default=([100, 256]),
        help=
        "the dimensions of the model: [neurons, input size] or [neurons, length, width]"
    )
    parser.add_argument("-p",
                        "--pool",
                        type=int,
                        nargs='+',
                        default=None,
                        help="pooling dimensions")
    parser.add_argument("-g",
                        "--group",
                        type=int,
                        default=None,
                        help="group size")
    parser.add_argument("-s",
                        "--step",
                        type=int,
                        default=None,
                        help="step size")
    parser.add_argument("-l",
                        "--learn_rate",
                        type=float,
                        default=.001,
                        help="learning rate")
    parser.add_argument("-i",
                        "--iterations",
                        type=int,
                        nargs='+',
                        default=[100],
                        help="number of iterations")
    parser.add_argument("-v",
                        "--verbosity",
                        type=int,
                        default=0,
                        help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o",
                        "--opt",
                        default="GD",
                        help="optimization method: GD or L-BFGS")
    parser.add_argument("-w",
                        "--whitening",
                        default='n',
                        help="whitening: 'y' or 'n'")
    parser.add_argument("-t",
                        "--test",
                        default='n',
                        help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a",
                        "--channels",
                        type=int,
                        default=1,
                        help="number of channels in data")
    parser.add_argument("-e",
                        "--examples",
                        type=int,
                        default=None,
                        help="number of training examples")
    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=1000,
                        help="number of examples in [mini]batch")
    parser.add_argument("-z",
                        "--aws",
                        default='n',
                        help="run on aws: 'y' or 'n'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)
    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data)
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(
                data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)),
                             int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(
                    scaling.LCNinput(data[:, channel, :, :].reshape(
                        (data.shape[0], 1, data.shape[2], data.shape[3])),
                                     kernel_shape=9),
                    (data.shape[0], data.shape[2], data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None
    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(model_type=args.model,
                       weight_dims=args.dimensions,
                       p=args.pool,
                       group_size=args.group,
                       step=args.step,
                       lr=args.learn_rate,
                       opt=args.opt,
                       c=args.convolution,
                       test=args.test,
                       batch_size=args.batch_size
                       )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" %
                          (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l],
                         model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B',
                         jac=True,
                         options={
                             'maxiter': args.iterations[l],
                             'disp': True
                         })

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, args.model[m], args.dimensions[m], args.iterations[m]))
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step))
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (args.filename, ex, args.whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")
    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

        # activations_norm['layer' + str(l)] = {}
        # activations_raw['layer' + str(l)] = {}
        # activations_shuffled['layer' + str(l)] = {}
        # reconstruction['layer' + str(l)] = {}
        # error_recon['layer' + str(l)] = {}
        # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers -
                                                            1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat
            activations_raw['layer' + str(l) + '_batch' + str(batch)] = f
            activations_shuffled['layer' + str(l) + '_batch' +
                                 str(batch)] = f_hat_shuffled
            reconstruction['layer' + str(l) + '_batch' + str(batch)] = err
            error_recon['layer' + str(l) + '_batch' + str(batch)] = rec
            pooled['layer' + str(l) + '_batch' + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + '/activations_norm_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_norm)
            savemat(
                directory_name + '/activation_raw_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_raw)

            if args.aws == 'y':

                k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activations_norm_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activations_norm_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

                k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activation_raw_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activation_raw_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T,
                            color='gray',
                            convolution=args.convolution,
                            pad=0,
                            examples=None,
                            channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(
                    args.dimensions[l][0],
                    data.shape[0] * activations_shuffled[layer].shape[2] *
                    activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(
                video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'],
                                color='gray',
                                convolution=args.convolution,
                                pad=1,
                                examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'],
                                                dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(
                pooled['layer0'][0, :, :, :].reshape(
                    1, pooled['layer0'].shape[1], pooled['layer0'].shape[2],
                    pooled['layer0'].shape[3]),
                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']  #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape(
                    (1, 1, dim / 2, dim / 2)),
                                                    kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp
    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(
                test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                  int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(
                    test_data.shape[0], test_data.shape[1] *
                    test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(
                    data.shape[0],
                    data.shape[1] * data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(
                    test_input[0].shape[0], test_input[0].shape[1] *
                    test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(
                    train_input.shape[0], train_input.shape[1] *
                    train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(
                train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" %
                  (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" %
                           (layer, accuracy))
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()

Example #5

Show file

File: regression_analysis.py Project: dlacombejr/sparse_filtering

def main():

    # get the folders in "saved" and select most recent
    base_path = os.path.dirname(__file__)
    folder_path = os.path.join(base_path, "saved")
    folders = os.listdir(folder_path)
    folder = folders[3]  # select most frequent folder  # -1

    # load in activation data
    print "loading in the data..."
    file_path = os.path.join(folder_path, folder, "concatenated_activations.mat")
    # data = loadmat(file_path)['master']  # [examples, neurons, image-space]
    data = h5py.File(file_path, 'r')['master']
    data = np.array(data)
    data = data.T
    print data.shape
    # TODO: scale and normalize data

    # load in data labels
    file_path = os.path.join(base_path, "data", "CIFAR_data.mat")
    train_labels = loadmat(file_path)['y']

    # augment training_labels to account for extra examples in image-space
    y_labels = numpy.matlib.repmat(train_labels, 1, data.shape[2]).reshape((data.shape[0] * data.shape[2], 1))

    # convert labels to binary vector
    lb = LabelBinarizer()
    lb.fit(train_labels)
    y_labels = lb.transform(y_labels)

    # perform neuron-wise regularized linear regression to obtain coefficients
    print "performing neuron-wise regularized linear regression..."
    neurons = data.shape[1]
    classes = 10
    coefficients = np.zeros((neurons, classes))
    for neuron in xrange(data.shape[1]):
        print neuron
        x = data[:, neuron, :].reshape(data.shape[0] * data.shape[2], 1)
        clf = Ridge(alpha=1.0)
        clf.fit(y_labels, x)
        coefficients[neuron, :] = clf.coef_

    # save the coefficients
    c = {'coefficients': coefficients}
    coefficient_path = os.path.join(folder_path, folder, "coefficients.mat")
    savemat(coefficient_path, c)

    # visualize histogram of coefficients
    pl.hist(np.abs(coefficients.flatten()), bins=30)
    pl.title('Frequency Distribution of Coefficient Values')
    pl.xlabel('Coefficient Value')
    pl.ylabel('Frequency')
    pl.show()

    # todo: find the N sparse filters from the data
    model = ['SparseFilter']
    n_filters = 10
    input_dim = coefficients.shape[1]
    dimensions = ([n_filters, input_dim],)  # number of filters equals number of classes
    pool = None
    group = None
    step = None
    learn_rate = .001
    opt = 'GD'
    convolution = 'n'
    test = 'n'
    batch_size = 1000
    random = 'n'
    weights = None
    iterations = 1000
    channels = 1
    n_batches = coefficients.shape[0] / batch_size
    if n_batches == 0:
        n_batches = 1

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=model,
        weight_dims=dimensions,
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c=convolution,
        test=test,
        batch_size=batch_size,
        random=random,
        weights=weights
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(np.float32(coefficients))

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    layer = None
    for l in xrange(model.n_layers):

        layer = 'layer' + str(l)
        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                c, w = train[l](index=batch_index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost[layer] = cost_layer
        weights[layer] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # order the components based on their activations (proxy for amount of variance explained)
    activations, _, _, _, _, _ = outputs[0](np.float32(coefficients))
    activations_summed = np.sum(np.abs(activations), axis=1)
    index = np.argsort(activations_summed)
    weights[layer] = weights[layer][index]

    # save the components (each column represents a component with each element the value for each object category)
    components_path = os.path.join(folder_path, folder, 'weights.mat')
    savemat(components_path, weights)

    # plot the cost function over time
    visualize.plotCost(cost)

    # visualize the components with respect to the object categories
    pl.imshow(weights[layer], interpolation='nearest')
    pl.title('Sparse Filtering Components')
    pl.xlabel('Weights')
    pl.ylabel('Filters')
    pl.xticks(np.arange(1, 10, 10))
    pl.yticks(np.arange(1, 10, 10))
    pl.show()

    # project the components back onto the cortical sheet (i.e., the dot product between each neuron's model
    # coefficients and each component)
    projections = activations
    visualize.drawplots(projections.T, color='gray', convolution=convolution,
                        pad=0, examples=None, channels=channels)

    # todo: compare the similarity of adjacent neurons of different distances and visualize
    distance_measure = 'cityblock'
    max_distance = cdist(np.atleast_2d([0, 0]), np.atleast_2d([np.sqrt(neurons), np.sqrt(neurons)]), distance_measure)
    continuity_data = np.zeros((1, max_distance))
    distances = distMat(neurons, d=neurons * 100, kind=distance_measure)
    pl.imshow(distances)
    pl.show()
    divisor = np.zeros((1, max_distance))
    for i in xrange(neurons):
        for j in xrange(neurons):
            correlation = pearsonr(coefficients[i, :].T, coefficients[j, :].T)
            d = distances[i, j]
            print d, correlation
            continuity_data[0, d] += correlation[0]
            divisor[0, d] += 1
            c += 1

    correlation_averages = continuity_data / divisor
    correlation_averages = correlation_averages[~np.isnan(correlation_averages)]
    # correlation_std = np.std(continuity_data, axis=0)
    # correlation_std = correlation_std[~np.isnan(correlation_std)] # todo: allow computation of std
    temp_std = np.linspace(.2, .1, len(correlation_averages))
    print temp_std
    print correlation_averages
    hypothetical_averages = [1., 0.7, 0.5, 0.4, 0.28, 0.21, 0.15, 0.09, 0.07, 0.05]
    hypothetical_stds = np.linspace(.07, .1, len(correlation_averages) - 1)
    fig = visualize.plot_mean_std(correlation_averages[0:10], temp_std[0:10], hypothetical_averages, hypothetical_stds)
    fig.show()

Example #6

Show file

File: ELPS.py Project: dlacombejr/sparse_filtering

    os.remove(directory_name + '/weights.mat')

# save the cost functions
savemat(directory_name + '/cost.mat', {'cost': cost_layer})
if aws == 'y':
    k.key = directory_name + '/cost.mat'
    k.set_contents_from_filename(directory_name + '/cost.mat')
    os.remove(directory_name + '/cost.mat')

# save the target
savemat(directory_name + '/target.mat', {'target': target})
if aws == 'y':
    k.key = directory_name + '/target.mat'
    k.set_contents_from_filename(directory_name + '/target.mat')
    os.remove(directory_name + '/target.mat')

# save the inhibition
savemat(directory_name + '/inhibition.mat', {'inhibition': a_out})
if aws == 'y':
    k.key = directory_name + '/inhibition.mat'
    k.set_contents_from_filename(directory_name + '/inhibition.mat')
    os.remove(directory_name + '/inhibition.mat')

# plot the cost
c = {'layer0': cost_layer}
visualize.plotCost(c)

# visualize the receptive fields of the first layer
visualize.drawplots(weight.T, color='gray', convolution='n',
                    pad=0, examples=None, channels=1)

Example #7

Show file

def main():

    # define global parameters
    model_type = ['SparseFilter']
    convolution = 'n'
    filename = 'unlabeled_10000.mat'
    # 'STL_10_unlabeled_patches.h5'  # _sample10'  # 'patches.mat'  # LCN  # _raw10  # _raw1000
    channels = 3
    patch_size = 14
    n_filters = 1600  # 1600
    dimensions = ([n_filters, patch_size * patch_size * channels],
                  )  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.0001
    iterations = [100]  # [50]  # [1]
    verbosity = 2
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 1000  # 360  # 8000  # todo: figure out why large batches produce nan cost

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = h5py.File(file_path, 'r')['patches']
    data = np.array(data)
    data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    channel_length = patch_size**2
    for channel in xrange(channels):
        start = channel * channel_length
        end = start + channel_length
        data[start:end] -= data[start:end].mean(axis=0)
        data[start:end] /= data[start:end].std(axis=0) + 1e-8
        data[start:end] = whiten(data[start:end].T).T

    data = np.float32(data.T)

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(model_type=model_type,
                       weight_dims=dimensions,
                       p=pool,
                       group_size=group,
                       step=step,
                       lr=learn_rate,
                       opt=opt,
                       c=convolution,
                       test=test_model,
                       batch_size=batch_size,
                       random='y',
                       weights=None)

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[0]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(
                    np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" %
                      (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
    directory_name = directory_format % time.localtime()[0:6]
    os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)

    # create log file
    ex = None
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m]))
        if model == 'GroupSF' or model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (filename, ex, whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()

    # get variables and save
    if verbosity >= 1:

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * batch_size
            end = begin + batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers -
                                                            1](data[begin:end])

            activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat
            activations_raw['layer' + str(l) + '_batch' + str(batch)] = f
            activations_shuffled['layer' + str(l) + '_batch' +
                                 str(batch)] = f_hat_shuffled
            reconstruction['layer' + str(l) + '_batch' + str(batch)] = err
            error_recon['layer' + str(l) + '_batch' + str(batch)] = rec
            pooled['layer' + str(l) + '_batch' + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + '/activations_norm_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_norm)
            savemat(
                directory_name + '/activation_raw_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_raw)

    # display figures
    if verbosity == 2:

        # if GD, plot the cost function over time
        if opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T,
                            color='gray',
                            convolution=convolution,
                            pad=0,
                            examples=None,
                            channels=channels)

        # # visualize the distribution of lifetime and population sparseness
        # for l in xrange(len(dimensions)):
        #     layer = 'layer' + str(l)
        #     if convolution == 'n':
        #         visualize.dispSparseHist(activations_norm[layer], l)
        #     elif convolution == 'y':
        #         visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0],
        #                                                                      data.shape[0] *
        #                                                                      activations_shuffled[layer].shape[2] *
        #                                                                      activations_shuffled[layer].shape[3]),
        #                                  layer=l)
        #
        # # visualize the distribution of activity across the "cortical sheet" and reconstruction
        # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution,
        #                     pad=1, examples=100)
        #
        # # visualize reconstruction capabilities
        # if convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1)
        # elif convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])
        #
        # # additional visualizations for convolutional network
        # if convolution == 'y':
        #
        #     dim = activations_raw['layer0'].shape[2]
        #
        #     # visualize an example of a convolved image
        #     visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
        #     # print activations_raw['layer0']
        #
        #     # visualize max-pooled activations and LCN output
        #     visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
        #                                                                              pooled['layer0'].shape[1],
        #                                                                              pooled['layer0'].shape[2],
        #                                                                              pooled['layer0'].shape[3]),
        #                                         dim=dim / 2)
        #
        #     # visualize an example of a LCNed convolved image after max pooling
        #     # temp = activations_raw['layer0']    #[0, :, :, :]
        #     temp = pooled['layer0']    #[0, :, :, :]
        #     # print temp.shape
        #     for i in range(temp.shape[1]):
        #         temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
        #     # temp = scaling.LCNinput(temp, kernel_shape=5)
        #     visualize.visualize_convolved_image(temp, dim=dim / 2)
        #     # print temp
    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if test_model == 'y':
        print 'testing...'

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"
        test_labels_file = "test.mat"
        model_type = ["ConvolutionalSF"]
        dimensions = ([1, n_filters, patch_size, patch_size], )
        convolution = 'y'
        test_model = 'y'
        batch_size = 100

        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)['y']

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)['y']

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)['fold_indices']
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # initialize convolutional network with learned parameters from above
        old_weights = model.layers[0].w.eval().reshape(
            (-1, channels, patch_size, patch_size))
        old_weights = theano.shared(
            old_weights.astype(dtype=theano.config.floatX))
        test_model = sf.Network(model_type=model_type,
                                weight_dims=dimensions,
                                p=pool,
                                group_size=group,
                                step=step,
                                lr=learn_rate,
                                opt=opt,
                                c=convolution,
                                test=test_model,
                                batch_size=batch_size,
                                random='y',
                                weights=old_weights)

        # compile the training, output, and test functions for the network
        print "compiling theano functions..."
        _, _, test = test_model.training_functions(train_data)

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        train_input = None
        test_input = None
        cm = None
        for layer in range(
                1, 2
        ):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer
            accuracy['layer' + str(layer)] = {}

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0]).shape[3]
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                train_data_fold = np.squeeze(train_data[fold_indices[0][fold]])
                train_labels_fold = np.squeeze(
                    train_labels[fold_indices[0][fold]])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(
                            test_data.shape[0], test_data.shape[1] *
                            test_data.shape[2] * test_data.shape[3])

                    train_input = train_data_fold.reshape(
                        train_data_fold.shape[0], train_data_fold.shape[1] *
                        train_data_fold.shape[2] * train_data_fold.shape[3])

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros(
                            (test_data.shape[0], n_filters, 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer -
                                        1](test_data[batch_start:batch_end]
                                           )  # test_data[0:batch_size]
                            temp = temp[0]
                            # for i in xrange(2):
                            #     for j in xrange(2):
                            #         pool_size = 48
                            #         i_start = i * pool_size
                            #         i_end = i_start + pool_size
                            #         j_start = j * pool_size
                            #         j_end = j_start + pool_size
                            #         test_input[batch_start:batch_end, :, i, j] = \
                            #             np.sum(
                            #                 temp[:, :, i_start:i_end, j_start:j_end],
                            #                 axis=(2, 3)
                            #         )
                            test_input[batch_start:batch_end] = quad_pool(temp)
                        test_input = test_input.reshape(
                            test_input.shape[0], test_input.shape[1] *
                            test_input.shape[2] * test_input.shape[3])

                    print "getting training data..."
                    # todo: also do quadrant pooling for training data (done) perhaps don't do batches here
                    # train_input = test[layer - 1](train_data_fold)  # test_data[0:batch_size]
                    # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] *
                    #                                      train_input[0].shape[2] * train_input[0].shape[3])
                    train_input = np.zeros(
                        (train_data_fold.shape[0], n_filters, 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer -
                                    1](train_data_fold[batch_start:batch_end]
                                       )  # test_data[0:batch_size]
                        temp = temp[0]
                        # for i in xrange(2):
                        #     for j in xrange(2):
                        #         pool_size = 48
                        #         i_start = i * pool_size
                        #         i_end = i_start + pool_size
                        #         j_start = j * pool_size
                        #         j_end = j_start + pool_size
                        #         train_input[batch_start:batch_end, :, i, j] = \
                        #             np.sum(
                        #                 temp[:, :, i_start:i_end, j_start:j_end],
                        #                 axis=(2, 3)
                        #         )
                        train_input[batch_start:batch_end] = quad_pool(temp)
                    train_input = train_input.reshape(
                        train_input.shape[0], train_input.shape[1] *
                        train_input.shape[2] * train_input.shape[3])

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print("Training linear SVM...")
                clf = svm.SVC(kernel="linear").fit(
                    train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print("Making predictions...")
                predictions = clf.predict(test_input)
                accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(
                    test_input, test_labels[0:examples])

                # display results and log them
                print(
                    "Accuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer,
                       accuracy['layer' + str(layer)]['fold' + str(fold)]))
                cm = confusion_matrix(test_labels[0:examples], predictions)
                log_file = open(directory_name + "/log.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer,
                       accuracy['layer' + str(layer)]['fold' + str(fold)]))
                log_file.close()

            # # visualize the confusion matrix
            # if test_model == 'y' and verbosity == 2:
            #
            #     import pylab as pl
            #
            #     pl.imshow(cm, interpolation='nearest')
            #     pl.title('Confusion Matrix for Network')
            #     pl.colorbar()
            #     pl.ylabel('True Label')
            #     pl.xlabel('Predicted Label')
            #     pl.show()

        # save the test results
        savemat('accuracy', accuracy)

Example #8

Show file

File: test.py Project: dlacombejr/sparse_filtering

def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1
        -------------------------------------------------------------------------------------------------------------
        ''')
    )
    parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type")
    parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no")
    parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename")
    parser.add_argument("-d", "--dimensions", type=int, nargs='+', default=([100, 256]),
                        help="the dimensions of the model: [neurons, input size] or [neurons, length, width]")
    parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions")
    parser.add_argument("-g", "--group", type=int, default=None, help="group size")
    parser.add_argument("-s", "--step", type=int, default=None, help="step size")
    parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate")
    parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations")
    parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS")
    parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'")
    parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data")
    parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples")
    parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch")
    parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'")
    parser.add_argument("-r", "--random", default='n', help="type of batches: random = 'y'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)

    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data.T).T
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                                     reshape((data.shape[0], 1,
                                                                              data.shape[2],
                                                                              data.shape[3])),
                                                                     kernel_shape=9), (
                                                    data.shape[0],
                                                    data.shape[2],
                                                    data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None

    # assert that the number of neurons in each layer is a perfect square
    for layer in xrange(len(args.dimensions)):
        assert np.sqrt(args.dimensions[layer][0]) % np.floor(np.sqrt(args.dimensions[layer][0])) == 0

    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group,
        step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test,
        batch_size=args.batch_size, random=args.random, weights=None
    )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l], model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B', jac=True,
                         options={'maxiter': args.iterations[l], 'disp': True})

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time        
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if args.aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")  # todo: create log file by looping through args
    # for arg in args:
    #     log_file.write(
    #         args.
    #     )
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m,
                                                                                    args.model[m],
                                                                                    args.dimensions[m],
                                                                                    args.iterations[m])
        )
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(
                " Groups: %d \n Step: %d" % (args.group, args.step)
            )
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(
        " Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)
    )
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")

    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

            # activations_norm['layer' + str(l)] = {}
            # activations_raw['layer' + str(l)] = {}
            # activations_shuffled['layer' + str(l)] = {}
            # reconstruction['layer' + str(l)] = {}
            # error_recon['layer' + str(l)] = {}
            # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            # define [mini]batch title
            batch_title = 'layer' + str(l) + '_batch' + '%03d' % batch

            # define norm and raw file names
            norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat'
            raw_file_name = directory_name + '/activation_raw_' + batch_title + '.mat'

            activations_norm[batch_title] = f_hat
            activations_raw[batch_title] = f
            activations_shuffled[batch_title] = f_hat_shuffled
            reconstruction[batch_title] = err
            error_recon[batch_title] = rec
            pooled[batch_title] = p

            # save model as well as weights and activations separately
            savemat(norm_file_name, activations_norm)
            # savemat(raw_file_name, activations_raw)

            if args.aws == 'y':

                k.key = norm_file_name
                k.set_contents_from_filename(norm_file_name)
                os.remove(norm_file_name)

                # k.key = raw_file_name
                # k.set_contents_from_filename(raw_file_name)
                # os.remove(raw_file_name)

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # output helper file for concatenating activations
    helper = {'batches': n_batches, 'output_size': f_hat.shape}
    helper_file_name = directory_name + '/helper.mat'
    savemat(helper_file_name, helper)
    if args.aws == 'y':
        k.key = helper_file_name
        k.set_contents_from_filename(helper_file_name)
        os.remove(helper_file_name)

    # get data if not on AWS
    if args.aws == 'n':
        f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data)
        activations_norm = {"layer0": f_hat}

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution,
                            pad=0, examples=None, channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(args.dimensions[l][0],
                                                                             data.shape[0] *
                                                                             activations_shuffled[layer].shape[2] *
                                                                             activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution,
                                pad=1, examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
                                                                                     pooled['layer0'].shape[1],
                                                                                     pooled['layer0'].shape[2],
                                                                                     pooled['layer0'].shape[3]),
                                                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']    #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp

    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                                     int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] *
                                               test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(data.shape[0], data.shape[1] *
                                           data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(test_input[0].shape[0], test_input[0].shape[1] *
                                                   test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] *
                                                  train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write(
                "\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)
            )
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()

Example #9

Show file

File: SF_test.py Project: dlacombejr/sparse_filtering

def main():

    # define global parameters
    model_type = ["SparseFilter"]
    convolution = "n"
    filename = "unlabeled_10000.mat"
    # 'STL_10_unlabeled_patches.h5'  # _sample10'  # 'patches.mat'  # LCN  # _raw10  # _raw1000
    channels = 3
    patch_size = 14
    n_filters = 1600  # 1600
    dimensions = ([n_filters, patch_size * patch_size * channels],)  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.0001
    iterations = [100]  # [50]  # [1]
    verbosity = 2
    opt = "GD"
    whitening = "y"
    test_model = "y"
    examples = None
    batch_size = 1000  # 360  # 8000  # todo: figure out why large batches produce nan cost

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = h5py.File(file_path, "r")["patches"]
    data = np.array(data)
    data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    channel_length = patch_size ** 2
    for channel in xrange(channels):
        start = channel * channel_length
        end = start + channel_length
        data[start:end] -= data[start:end].mean(axis=0)
        data[start:end] /= data[start:end].std(axis=0) + 1e-8
        data[start:end] = whiten(data[start:end].T).T

    data = np.float32(data.T)

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=model_type,
        weight_dims=dimensions,
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c=convolution,
        test=test_model,
        batch_size=batch_size,
        random="y",
        weights=None,
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[0]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print ("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost["layer" + str(l)] = cost_layer
        weights["layer" + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print ("Elapsed training time: %f" % elapsed)

    # create sub-folder for saved model
    directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
    directory_name = directory_format % time.localtime()[0:6]
    os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + "/model.pkl"
    pickle.dump(model, open(full_path, "w"), pickle.HIGHEST_PROTOCOL)

    # save weights separately
    savemat(directory_name + "/weights.mat", weights)

    # create log file
    ex = None
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m])
        )
        if model == "GroupSF" or model == "GroupConvolutionalSF":
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening))
    log_file.write("\nElapsed training time: %f" % elapsed)
    log_file.close()

    # get variables and save
    if verbosity >= 1:

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * batch_size
            end = begin + batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end])

            activations_norm["layer" + str(l) + "_batch" + str(batch)] = f_hat
            activations_raw["layer" + str(l) + "_batch" + str(batch)] = f
            activations_shuffled["layer" + str(l) + "_batch" + str(batch)] = f_hat_shuffled
            reconstruction["layer" + str(l) + "_batch" + str(batch)] = err
            error_recon["layer" + str(l) + "_batch" + str(batch)] = rec
            pooled["layer" + str(l) + "_batch" + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + "/activations_norm_" + "layer" + str(l) + "_batch" + str(batch) + ".mat",
                activations_norm,
            )
            savemat(
                directory_name + "/activation_raw_" + "layer" + str(l) + "_batch" + str(batch) + ".mat", activations_raw
            )

    # display figures
    if verbosity == 2:

        # if GD, plot the cost function over time
        if opt == "GD":
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(
            weights["layer0"].T, color="gray", convolution=convolution, pad=0, examples=None, channels=channels
        )

        # # visualize the distribution of lifetime and population sparseness
        # for l in xrange(len(dimensions)):
        #     layer = 'layer' + str(l)
        #     if convolution == 'n':
        #         visualize.dispSparseHist(activations_norm[layer], l)
        #     elif convolution == 'y':
        #         visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0],
        #                                                                      data.shape[0] *
        #                                                                      activations_shuffled[layer].shape[2] *
        #                                                                      activations_shuffled[layer].shape[3]),
        #                                  layer=l)
        #
        # # visualize the distribution of activity across the "cortical sheet" and reconstruction
        # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution,
        #                     pad=1, examples=100)
        #
        # # visualize reconstruction capabilities
        # if convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1)
        # elif convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])
        #
        # # additional visualizations for convolutional network
        # if convolution == 'y':
        #
        #     dim = activations_raw['layer0'].shape[2]
        #
        #     # visualize an example of a convolved image
        #     visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
        #     # print activations_raw['layer0']
        #
        #     # visualize max-pooled activations and LCN output
        #     visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
        #                                                                              pooled['layer0'].shape[1],
        #                                                                              pooled['layer0'].shape[2],
        #                                                                              pooled['layer0'].shape[3]),
        #                                         dim=dim / 2)
        #
        #     # visualize an example of a LCNed convolved image after max pooling
        #     # temp = activations_raw['layer0']    #[0, :, :, :]
        #     temp = pooled['layer0']    #[0, :, :, :]
        #     # print temp.shape
        #     for i in range(temp.shape[1]):
        #         temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
        #     # temp = scaling.LCNinput(temp, kernel_shape=5)
        #     visualize.visualize_convolved_image(temp, dim=dim / 2)
        #     # print temp

    """ ================================ Test the Model ======================================= """

    # test the model if evaluating classification performance
    if test_model == "y":
        print "testing..."

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"
        test_labels_file = "test.mat"
        model_type = ["ConvolutionalSF"]
        dimensions = ([1, n_filters, patch_size, patch_size],)
        convolution = "y"
        test_model = "y"
        batch_size = 100

        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)["X"]
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)["y"]

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)["X"]
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)["y"]

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)["fold_indices"]
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # initialize convolutional network with learned parameters from above
        old_weights = model.layers[0].w.eval().reshape((-1, channels, patch_size, patch_size))
        old_weights = theano.shared(old_weights.astype(dtype=theano.config.floatX))
        test_model = sf.Network(
            model_type=model_type,
            weight_dims=dimensions,
            p=pool,
            group_size=group,
            step=step,
            lr=learn_rate,
            opt=opt,
            c=convolution,
            test=test_model,
            batch_size=batch_size,
            random="y",
            weights=old_weights,
        )

        # compile the training, output, and test functions for the network
        print "compiling theano functions..."
        _, _, test = test_model.training_functions(train_data)

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        train_input = None
        test_input = None
        cm = None
        for layer in range(1, 2):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer
            accuracy["layer" + str(layer)] = {}

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0]).shape[3]
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                train_data_fold = np.squeeze(train_data[fold_indices[0][fold]])
                train_labels_fold = np.squeeze(train_labels[fold_indices[0][fold]])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(
                            test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]
                        )

                    train_input = train_data_fold.reshape(
                        train_data_fold.shape[0],
                        train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3],
                    )

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros((test_data.shape[0], n_filters, 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer - 1](test_data[batch_start:batch_end])  # test_data[0:batch_size]
                            temp = temp[0]
                            # for i in xrange(2):
                            #     for j in xrange(2):
                            #         pool_size = 48
                            #         i_start = i * pool_size
                            #         i_end = i_start + pool_size
                            #         j_start = j * pool_size
                            #         j_end = j_start + pool_size
                            #         test_input[batch_start:batch_end, :, i, j] = \
                            #             np.sum(
                            #                 temp[:, :, i_start:i_end, j_start:j_end],
                            #                 axis=(2, 3)
                            #         )
                            test_input[batch_start:batch_end] = quad_pool(temp)
                        test_input = test_input.reshape(
                            test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]
                        )

                    print "getting training data..."
                    # todo: also do quadrant pooling for training data (done) perhaps don't do batches here
                    # train_input = test[layer - 1](train_data_fold)  # test_data[0:batch_size]
                    # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] *
                    #                                      train_input[0].shape[2] * train_input[0].shape[3])
                    train_input = np.zeros((train_data_fold.shape[0], n_filters, 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer - 1](train_data_fold[batch_start:batch_end])  # test_data[0:batch_size]
                        temp = temp[0]
                        # for i in xrange(2):
                        #     for j in xrange(2):
                        #         pool_size = 48
                        #         i_start = i * pool_size
                        #         i_end = i_start + pool_size
                        #         j_start = j * pool_size
                        #         j_end = j_start + pool_size
                        #         train_input[batch_start:batch_end, :, i, j] = \
                        #             np.sum(
                        #                 temp[:, :, i_start:i_end, j_start:j_end],
                        #                 axis=(2, 3)
                        #         )
                        train_input[batch_start:batch_end] = quad_pool(temp)
                    train_input = train_input.reshape(
                        train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]
                    )

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print ("Training linear SVM...")
                clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print ("Making predictions...")
                predictions = clf.predict(test_input)
                accuracy["layer" + str(layer)]["fold" + str(fold)] = clf.score(test_input, test_labels[0:examples])

                # display results and log them
                print (
                    "Accuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)])
                )
                cm = confusion_matrix(test_labels[0:examples], predictions)
                log_file = open(directory_name + "/log.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)])
                )
                log_file.close()

            # # visualize the confusion matrix
            # if test_model == 'y' and verbosity == 2:
            #
            #     import pylab as pl
            #
            #     pl.imshow(cm, interpolation='nearest')
            #     pl.title('Confusion Matrix for Network')
            #     pl.colorbar()
            #     pl.ylabel('True Label')
            #     pl.xlabel('Predicted Label')
            #     pl.show()

        # save the test results
        savemat("accuracy", accuracy)

Example #10

Show file

File: tensorflow_SF.py Project: dlacombejr/sparse_filtering

def main():

    # define global parameters
    filename = 'patches.mat'
    n_filters = 100
    learn_rate = 0.001
    iterations = [200]

    # load in data and preprocess
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = loadmat(file_path)['X']
    data -= data.mean(axis=0)
    data = np.float32(data.T)

    # construct the network
    print "building model..."
    weights = tf.Variable(tf.random_uniform([n_filters, data.shape[1]]))
    model = SparseFilter(weights, data)

    # define loss, optimizer, and train function
    loss = tf.reduce_sum(model.feed_forward())
    optimizer = tf.train.GradientDescentOptimizer(learn_rate)
    train = optimizer.minimize(loss)

    # initialize all the variables
    init = tf.initialize_all_variables()

    # run the session
    sess = tf.Session()
    sess.run(init)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost_running = []

    # iterate over training epochs
    for epoch in xrange(iterations[0]):

        sess.run(train)
        current_cost = sess.run(loss)
        cost_running.append(current_cost)
        print("Cost at epoch %i: %0.4f" % (epoch, current_cost))

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    c = {'layer0': cost_running}
    visualize.plotCost(c)

    # visualize the receptive fields of the first layer
    weights_final = sess.run(weights)
    print weights_final.shape
    visualize.drawplots(weights_final.T,
                        color='gray',
                        convolution='n',
                        pad=0,
                        examples=None,
                        channels=1)