def main():

    # get the folders in "saved" and select most recent
    base_path = os.path.dirname(__file__)
    folder_path = os.path.join(base_path, "saved")
    folders = os.listdir(folder_path)
    folder = folders[-1]

    # load in activation data
    file_path = os.path.join(folder_path, folder, "weights.mat")
    data = loadmat(file_path)

    # visualize the receptive fields of the first layer
    visualize.drawplots(data['layer0'].T, color='gray', convolution='y',
                        pad=0, examples=None, channels=1)

    # visualize the distribution of weights
    for l in xrange(3):

        layer = 'layer' + str(l)
        activations = data[layer]

        pl.subplot(2, 2, l + 1)
        pl.hist(activations.flatten(), bins=50)
        pl.xlabel("Weights")
        pl.ylabel("Count")
        pl.title("Feature Weight Histogram for layer " + str(layer))

    pl.show()
def main():

    # get the folders in "saved" and select most recent
    base_path = os.path.dirname(__file__)
    folder_path = os.path.join(base_path, "saved")
    folders = os.listdir(folder_path)
    folder = folders[-1]

    # load in activation data
    file_path = os.path.join(folder_path, folder, "weights.mat")
    data = loadmat(file_path)

    # visualize the receptive fields of the first layer
    visualize.drawplots(data["layer0"].T, color="gray", convolution="y", pad=0, examples=None, channels=1)

    # visualize the distribution of weights
    for l in xrange(3):

        layer = "layer" + str(l)
        activations = data[layer]

        pl.subplot(2, 2, l + 1)
        pl.hist(activations.flatten(), bins=50)
        pl.xlabel("Weights")
        pl.ylabel("Count")
        pl.title("Feature Weight Histogram for layer " + str(layer))

    pl.show()
def main():

    # define global parameters
    filename = 'patches.mat'
    n_filters = 100
    learn_rate = 0.001
    iterations = [200]

    # load in data and preprocess
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = loadmat(file_path)['X']
    data -= data.mean(axis=0)
    data = np.float32(data.T)

    # construct the network
    print "building model..."
    weights = tf.Variable(tf.random_uniform([n_filters, data.shape[1]]))
    model = SparseFilter(weights, data)

    # define loss, optimizer, and train function
    loss = tf.reduce_sum(model.feed_forward())
    optimizer = tf.train.GradientDescentOptimizer(learn_rate)
    train = optimizer.minimize(loss)

    # initialize all the variables
    init = tf.initialize_all_variables()

    # run the session
    sess = tf.Session()
    sess.run(init)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost_running = []

    # iterate over training epochs
    for epoch in xrange(iterations[0]):

        sess.run(train)
        current_cost = sess.run(loss)
        cost_running.append(current_cost)
        print("Cost at epoch %i: %0.4f" % (epoch, current_cost))

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    c = {'layer0': cost_running}
    visualize.plotCost(c)

    # visualize the receptive fields of the first layer
    weights_final = sess.run(weights)
    print weights_final.shape
    visualize.drawplots(weights_final.T, color='gray', convolution='n',
                        pad=0, examples=None, channels=1)
Beispiel #4
0
        weights['layer' + str(l)] = w

    # save model to dictionary
    models[model_type[0]] = model

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    visualize.plotCost(cost)

    # visualize the receptive fields of the first layer
    visualize.drawplots(weights['layer0'].T,
                        color='gray',
                        convolution=convolution,
                        pad=0,
                        examples=None,
                        channels=channels)

    # get activations of first layer and save in dictionary
    f_hat, _, _, _, _, _ = outputs[0](data)
    f_hats[model_type[0]] = f_hat

# project activations of both networks up using local connections
group_matrix = connections.gMatToroidal(n_filters,
                                        topographic_parameters[0],
                                        topographic_parameters[1],
                                        centered='n')
gf_hats = {}
for model in model_type_meta:
    model = model[0]
pl.subplot(2, 4, 8)
pl.scatter(horizontal_vectors[:, 0], horizontal_vectors[:, 1], s=1)
pl.xlim([0, 16])
pl.ylim([0, 16])
pl.gca().set_aspect('equal', adjustable='box')
pl.title('location')
# pl.show()


pl.show()


###########
# visualize the original weights and the learned Gabor patches

# read in sample weights
file_path = os.path.join(folder_path, "weights.mat")
weights = loadmat(file_path)['layer0']  # [neurons, weights]

# visualize the weights
drawplots(weights.T, color='gray', convolution='n', pad=0, examples=None, channels=1)

# read in sample Gabor patches
file_path = os.path.join(folder_path, "gabors2.mat")
gabors = loadmat(file_path)['gabors']  # [neurons, weights]
gabors = gabors.reshape((gabors.shape[0], gabors.shape[1] * gabors.shape[2]))

# visualize the Gabor patches
drawplots(gabors.T, color='gray', convolution='n', pad=0, examples=None, channels=1)

Beispiel #6
0
from utilities.visualize import plotCost, drawplots
from scipy.io import loadmat
    

#################### MAIN SCRIPT #########################
    
# load data, normalize, and convert to float32
basepath = os.path.dirname(__file__)
filename = 'patches.mat'
filepath = os.path.join(basepath, "data", filename)
data = loadmat(filepath)['X']
data -= data.mean(axis=0)
data = np.float32(data)

# construct the network
wDims = [[100, 256]]
model = sparse_filtering.network(model_type='SF', wDims=wDims, p=None, 
                 gMat=None, gSize=None, step=None, lr=0.01)
train = model.training_functions(data)

# train the sparse filtering network
maxIter = 100
cost_master = []
for i in range(maxIter):
    cost, w = train[0]()
    cost_master.append(cost)
    print("The cost at iteration %i: %f" %(i, cost))

plotCost(cost_master)
drawplots(w.T) 
pl.title('location')
# pl.show()

pl.show()

###########
# visualize the original weights and the learned Gabor patches

# read in sample weights
file_path = os.path.join(folder_path, "weights.mat")
weights = loadmat(file_path)['layer0']  # [neurons, weights]

# visualize the weights
drawplots(weights.T,
          color='gray',
          convolution='n',
          pad=0,
          examples=None,
          channels=1)

# read in sample Gabor patches
file_path = os.path.join(folder_path, "gabors2.mat")
gabors = loadmat(file_path)['gabors']  # [neurons, weights]
gabors = gabors.reshape((gabors.shape[0], gabors.shape[1] * gabors.shape[2]))

# visualize the Gabor patches
drawplots(gabors.T,
          color='gray',
          convolution='n',
          pad=0,
          examples=None,
          channels=1)
Beispiel #8
0
def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1

        In the convolutional case, the extra "1" is added automatically for broadcasting.
        -------------------------------------------------------------------------------------------------------------
        '''))
    parser.add_argument("-m",
                        "--model",
                        default=['SparseFilter'],
                        nargs='+',
                        help="the model type")
    parser.add_argument("-c",
                        "--convolution",
                        default="n",
                        help="convolution, yes or no")
    parser.add_argument("-f",
                        "--filename",
                        default="patches.mat",
                        help="the data filename")
    parser.add_argument(
        "-d",
        "--dimensions",
        type=int,
        nargs='+',
        default=([100, 256]),
        help=
        "the dimensions of the model: [neurons, input size] or [neurons, length, width]"
    )
    parser.add_argument("-p",
                        "--pool",
                        type=int,
                        nargs='+',
                        default=None,
                        help="pooling dimensions")
    parser.add_argument("-g",
                        "--group",
                        type=int,
                        default=None,
                        help="group size")
    parser.add_argument("-s",
                        "--step",
                        type=int,
                        default=None,
                        help="step size")
    parser.add_argument("-l",
                        "--learn_rate",
                        type=float,
                        default=.001,
                        help="learning rate")
    parser.add_argument("-i",
                        "--iterations",
                        type=int,
                        nargs='+',
                        default=[100],
                        help="number of iterations")
    parser.add_argument("-v",
                        "--verbosity",
                        type=int,
                        default=0,
                        help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o",
                        "--opt",
                        default="GD",
                        help="optimization method: GD or L-BFGS")
    parser.add_argument("-w",
                        "--whitening",
                        default='n',
                        help="whitening: 'y' or 'n'")
    parser.add_argument("-t",
                        "--test",
                        default='n',
                        help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a",
                        "--channels",
                        type=int,
                        default=1,
                        help="number of channels in data")
    parser.add_argument("-e",
                        "--examples",
                        type=int,
                        default=None,
                        help="number of training examples")
    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=1000,
                        help="number of examples in [mini]batch")
    parser.add_argument("-z",
                        "--aws",
                        default='n',
                        help="run on aws: 'y' or 'n'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)
    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data)
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(
                data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)),
                             int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(
                    scaling.LCNinput(data[:, channel, :, :].reshape(
                        (data.shape[0], 1, data.shape[2], data.shape[3])),
                                     kernel_shape=9),
                    (data.shape[0], data.shape[2], data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None
    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(model_type=args.model,
                       weight_dims=args.dimensions,
                       p=args.pool,
                       group_size=args.group,
                       step=args.step,
                       lr=args.learn_rate,
                       opt=args.opt,
                       c=args.convolution,
                       test=args.test,
                       batch_size=args.batch_size
                       )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" %
                          (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l],
                         model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B',
                         jac=True,
                         options={
                             'maxiter': args.iterations[l],
                             'disp': True
                         })

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, args.model[m], args.dimensions[m], args.iterations[m]))
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step))
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (args.filename, ex, args.whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")
    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

        # activations_norm['layer' + str(l)] = {}
        # activations_raw['layer' + str(l)] = {}
        # activations_shuffled['layer' + str(l)] = {}
        # reconstruction['layer' + str(l)] = {}
        # error_recon['layer' + str(l)] = {}
        # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers -
                                                            1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat
            activations_raw['layer' + str(l) + '_batch' + str(batch)] = f
            activations_shuffled['layer' + str(l) + '_batch' +
                                 str(batch)] = f_hat_shuffled
            reconstruction['layer' + str(l) + '_batch' + str(batch)] = err
            error_recon['layer' + str(l) + '_batch' + str(batch)] = rec
            pooled['layer' + str(l) + '_batch' + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + '/activations_norm_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_norm)
            savemat(
                directory_name + '/activation_raw_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_raw)

            if args.aws == 'y':

                k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activations_norm_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activations_norm_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

                k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activation_raw_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activation_raw_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T,
                            color='gray',
                            convolution=args.convolution,
                            pad=0,
                            examples=None,
                            channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(
                    args.dimensions[l][0],
                    data.shape[0] * activations_shuffled[layer].shape[2] *
                    activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(
                video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'],
                                color='gray',
                                convolution=args.convolution,
                                pad=1,
                                examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'],
                                                dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(
                pooled['layer0'][0, :, :, :].reshape(
                    1, pooled['layer0'].shape[1], pooled['layer0'].shape[2],
                    pooled['layer0'].shape[3]),
                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']  #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape(
                    (1, 1, dim / 2, dim / 2)),
                                                    kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp
    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(
                test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                  int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(
                    test_data.shape[0], test_data.shape[1] *
                    test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(
                    data.shape[0],
                    data.shape[1] * data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(
                    test_input[0].shape[0], test_input[0].shape[1] *
                    test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(
                    train_input.shape[0], train_input.shape[1] *
                    train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(
                train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" %
                  (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" %
                           (layer, accuracy))
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()
                cost, weights = train(train_out[batch_begin:batch_end])
                print "Cost at iteration %d and batch %d for model " % (iteration, batch) + model + ": %d" % cost
            accuracy = float(np.mean(np.argmax(test_labels, axis=1) == predict(test_out)))
            print "Classification performance for model " + model + " at iteration %d: %f" % (
                iteration,
                accuracy
            )

    # save weights
    final_weights[model] = weights

# visualize the weights for each class / neuron across the cortical sheet
print "visualizing weights..."
if convolutional == 'n':
    for model in model_names:
        drawplots(final_weights[model].T, color='gray', convolution='n', pad=0, examples=None, channels=1)
elif convolutional == 'y':
    for model in model_names:
        for category in xrange(train_labels.shape[1]):
            w = final_weights[model][:, category]
            w = w.reshape(np.sqrt(w.shape[0]), np.sqrt(w.shape[0]))
            pl.subplot(2, 5, category + 1)
            pl.imshow(w)

        pl.title("Weight distributions for model " + model)
        pl.show()

# find optimal neuronal positions (for N random initial positions)
print "finding optimal neuronal positions..."
optimal_positions = {}
minimal_wiring_length = {}
def main():

    # get the folders in "saved" and select most recent
    base_path = os.path.dirname(__file__)
    folder_path = os.path.join(base_path, "saved")
    folders = os.listdir(folder_path)
    folder = folders[3]  # select most frequent folder  # -1

    # load in activation data
    print "loading in the data..."
    file_path = os.path.join(folder_path, folder, "concatenated_activations.mat")
    # data = loadmat(file_path)['master']  # [examples, neurons, image-space]
    data = h5py.File(file_path, 'r')['master']
    data = np.array(data)
    data = data.T
    print data.shape
    # TODO: scale and normalize data

    # load in data labels
    file_path = os.path.join(base_path, "data", "CIFAR_data.mat")
    train_labels = loadmat(file_path)['y']

    # augment training_labels to account for extra examples in image-space
    y_labels = numpy.matlib.repmat(train_labels, 1, data.shape[2]).reshape((data.shape[0] * data.shape[2], 1))

    # convert labels to binary vector
    lb = LabelBinarizer()
    lb.fit(train_labels)
    y_labels = lb.transform(y_labels)

    # perform neuron-wise regularized linear regression to obtain coefficients
    print "performing neuron-wise regularized linear regression..."
    neurons = data.shape[1]
    classes = 10
    coefficients = np.zeros((neurons, classes))
    for neuron in xrange(data.shape[1]):
        print neuron
        x = data[:, neuron, :].reshape(data.shape[0] * data.shape[2], 1)
        clf = Ridge(alpha=1.0)
        clf.fit(y_labels, x)
        coefficients[neuron, :] = clf.coef_

    # save the coefficients
    c = {'coefficients': coefficients}
    coefficient_path = os.path.join(folder_path, folder, "coefficients.mat")
    savemat(coefficient_path, c)

    # visualize histogram of coefficients
    pl.hist(np.abs(coefficients.flatten()), bins=30)
    pl.title('Frequency Distribution of Coefficient Values')
    pl.xlabel('Coefficient Value')
    pl.ylabel('Frequency')
    pl.show()

    # todo: find the N sparse filters from the data
    model = ['SparseFilter']
    n_filters = 10
    input_dim = coefficients.shape[1]
    dimensions = ([n_filters, input_dim],)  # number of filters equals number of classes
    pool = None
    group = None
    step = None
    learn_rate = .001
    opt = 'GD'
    convolution = 'n'
    test = 'n'
    batch_size = 1000
    random = 'n'
    weights = None
    iterations = 1000
    channels = 1
    n_batches = coefficients.shape[0] / batch_size
    if n_batches == 0:
        n_batches = 1

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=model,
        weight_dims=dimensions,
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c=convolution,
        test=test,
        batch_size=batch_size,
        random=random,
        weights=weights
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(np.float32(coefficients))

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    layer = None
    for l in xrange(model.n_layers):

        layer = 'layer' + str(l)
        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                c, w = train[l](index=batch_index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost[layer] = cost_layer
        weights[layer] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # order the components based on their activations (proxy for amount of variance explained)
    activations, _, _, _, _, _ = outputs[0](np.float32(coefficients))
    activations_summed = np.sum(np.abs(activations), axis=1)
    index = np.argsort(activations_summed)
    weights[layer] = weights[layer][index]

    # save the components (each column represents a component with each element the value for each object category)
    components_path = os.path.join(folder_path, folder, 'weights.mat')
    savemat(components_path, weights)

    # plot the cost function over time
    visualize.plotCost(cost)

    # visualize the components with respect to the object categories
    pl.imshow(weights[layer], interpolation='nearest')
    pl.title('Sparse Filtering Components')
    pl.xlabel('Weights')
    pl.ylabel('Filters')
    pl.xticks(np.arange(1, 10, 10))
    pl.yticks(np.arange(1, 10, 10))
    pl.show()

    # project the components back onto the cortical sheet (i.e., the dot product between each neuron's model
    # coefficients and each component)
    projections = activations
    visualize.drawplots(projections.T, color='gray', convolution=convolution,
                        pad=0, examples=None, channels=channels)

    # todo: compare the similarity of adjacent neurons of different distances and visualize
    distance_measure = 'cityblock'
    max_distance = cdist(np.atleast_2d([0, 0]), np.atleast_2d([np.sqrt(neurons), np.sqrt(neurons)]), distance_measure)
    continuity_data = np.zeros((1, max_distance))
    distances = distMat(neurons, d=neurons * 100, kind=distance_measure)
    pl.imshow(distances)
    pl.show()
    divisor = np.zeros((1, max_distance))
    for i in xrange(neurons):
        for j in xrange(neurons):
            correlation = pearsonr(coefficients[i, :].T, coefficients[j, :].T)
            d = distances[i, j]
            print d, correlation
            continuity_data[0, d] += correlation[0]
            divisor[0, d] += 1
            c += 1

    correlation_averages = continuity_data / divisor
    correlation_averages = correlation_averages[~np.isnan(correlation_averages)]
    # correlation_std = np.std(continuity_data, axis=0)
    # correlation_std = correlation_std[~np.isnan(correlation_std)] # todo: allow computation of std
    temp_std = np.linspace(.2, .1, len(correlation_averages))
    print temp_std
    print correlation_averages
    hypothetical_averages = [1., 0.7, 0.5, 0.4, 0.28, 0.21, 0.15, 0.09, 0.07, 0.05]
    hypothetical_stds = np.linspace(.07, .1, len(correlation_averages) - 1)
    fig = visualize.plot_mean_std(correlation_averages[0:10], temp_std[0:10], hypothetical_averages, hypothetical_stds)
    fig.show()
Beispiel #11
0
    os.remove(directory_name + '/weights.mat')

# save the cost functions
savemat(directory_name + '/cost.mat', {'cost': cost_layer})
if aws == 'y':
    k.key = directory_name + '/cost.mat'
    k.set_contents_from_filename(directory_name + '/cost.mat')
    os.remove(directory_name + '/cost.mat')

# save the target
savemat(directory_name + '/target.mat', {'target': target})
if aws == 'y':
    k.key = directory_name + '/target.mat'
    k.set_contents_from_filename(directory_name + '/target.mat')
    os.remove(directory_name + '/target.mat')

# save the inhibition
savemat(directory_name + '/inhibition.mat', {'inhibition': a_out})
if aws == 'y':
    k.key = directory_name + '/inhibition.mat'
    k.set_contents_from_filename(directory_name + '/inhibition.mat')
    os.remove(directory_name + '/inhibition.mat')

# plot the cost
c = {'layer0': cost_layer}
visualize.plotCost(c)

# visualize the receptive fields of the first layer
visualize.drawplots(weight.T, color='gray', convolution='n',
                    pad=0, examples=None, channels=1)
Beispiel #12
0
                    iteration, batch) + model + ": %d" % cost
            accuracy = float(
                np.mean(np.argmax(test_labels, axis=1) == predict(test_out)))
            print "Classification performance for model " + model + " at iteration %d: %f" % (
                iteration, accuracy)

    # save weights
    final_weights[model] = weights

# visualize the weights for each class / neuron across the cortical sheet
print "visualizing weights..."
if convolutional == 'n':
    for model in model_names:
        drawplots(final_weights[model].T,
                  color='gray',
                  convolution='n',
                  pad=0,
                  examples=None,
                  channels=1)
elif convolutional == 'y':
    for model in model_names:
        for category in xrange(train_labels.shape[1]):
            w = final_weights[model][:, category]
            w = w.reshape(np.sqrt(w.shape[0]), np.sqrt(w.shape[0]))
            pl.subplot(2, 5, category + 1)
            pl.imshow(w)

        pl.title("Weight distributions for model " + model)
        pl.show()

# find optimal neuronal positions (for N random initial positions)
print "finding optimal neuronal positions..."
Beispiel #13
0
def main():

    # define global parameters
    model_type = ['SparseFilter']
    convolution = 'n'
    filename = 'unlabeled_10000.mat'
    # 'STL_10_unlabeled_patches.h5'  # _sample10'  # 'patches.mat'  # LCN  # _raw10  # _raw1000
    channels = 3
    patch_size = 14
    n_filters = 1600  # 1600
    dimensions = ([n_filters, patch_size * patch_size * channels],
                  )  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.0001
    iterations = [100]  # [50]  # [1]
    verbosity = 2
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 1000  # 360  # 8000  # todo: figure out why large batches produce nan cost

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = h5py.File(file_path, 'r')['patches']
    data = np.array(data)
    data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    channel_length = patch_size**2
    for channel in xrange(channels):
        start = channel * channel_length
        end = start + channel_length
        data[start:end] -= data[start:end].mean(axis=0)
        data[start:end] /= data[start:end].std(axis=0) + 1e-8
        data[start:end] = whiten(data[start:end].T).T

    data = np.float32(data.T)

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(model_type=model_type,
                       weight_dims=dimensions,
                       p=pool,
                       group_size=group,
                       step=step,
                       lr=learn_rate,
                       opt=opt,
                       c=convolution,
                       test=test_model,
                       batch_size=batch_size,
                       random='y',
                       weights=None)

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[0]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(
                    np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" %
                      (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
    directory_name = directory_format % time.localtime()[0:6]
    os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)

    # create log file
    ex = None
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m]))
        if model == 'GroupSF' or model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (filename, ex, whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()

    # get variables and save
    if verbosity >= 1:

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * batch_size
            end = begin + batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers -
                                                            1](data[begin:end])

            activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat
            activations_raw['layer' + str(l) + '_batch' + str(batch)] = f
            activations_shuffled['layer' + str(l) + '_batch' +
                                 str(batch)] = f_hat_shuffled
            reconstruction['layer' + str(l) + '_batch' + str(batch)] = err
            error_recon['layer' + str(l) + '_batch' + str(batch)] = rec
            pooled['layer' + str(l) + '_batch' + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + '/activations_norm_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_norm)
            savemat(
                directory_name + '/activation_raw_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_raw)

    # display figures
    if verbosity == 2:

        # if GD, plot the cost function over time
        if opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T,
                            color='gray',
                            convolution=convolution,
                            pad=0,
                            examples=None,
                            channels=channels)

        # # visualize the distribution of lifetime and population sparseness
        # for l in xrange(len(dimensions)):
        #     layer = 'layer' + str(l)
        #     if convolution == 'n':
        #         visualize.dispSparseHist(activations_norm[layer], l)
        #     elif convolution == 'y':
        #         visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0],
        #                                                                      data.shape[0] *
        #                                                                      activations_shuffled[layer].shape[2] *
        #                                                                      activations_shuffled[layer].shape[3]),
        #                                  layer=l)
        #
        # # visualize the distribution of activity across the "cortical sheet" and reconstruction
        # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution,
        #                     pad=1, examples=100)
        #
        # # visualize reconstruction capabilities
        # if convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1)
        # elif convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])
        #
        # # additional visualizations for convolutional network
        # if convolution == 'y':
        #
        #     dim = activations_raw['layer0'].shape[2]
        #
        #     # visualize an example of a convolved image
        #     visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
        #     # print activations_raw['layer0']
        #
        #     # visualize max-pooled activations and LCN output
        #     visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
        #                                                                              pooled['layer0'].shape[1],
        #                                                                              pooled['layer0'].shape[2],
        #                                                                              pooled['layer0'].shape[3]),
        #                                         dim=dim / 2)
        #
        #     # visualize an example of a LCNed convolved image after max pooling
        #     # temp = activations_raw['layer0']    #[0, :, :, :]
        #     temp = pooled['layer0']    #[0, :, :, :]
        #     # print temp.shape
        #     for i in range(temp.shape[1]):
        #         temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
        #     # temp = scaling.LCNinput(temp, kernel_shape=5)
        #     visualize.visualize_convolved_image(temp, dim=dim / 2)
        #     # print temp
    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if test_model == 'y':
        print 'testing...'

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"
        test_labels_file = "test.mat"
        model_type = ["ConvolutionalSF"]
        dimensions = ([1, n_filters, patch_size, patch_size], )
        convolution = 'y'
        test_model = 'y'
        batch_size = 100

        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)['y']

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)['y']

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)['fold_indices']
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # initialize convolutional network with learned parameters from above
        old_weights = model.layers[0].w.eval().reshape(
            (-1, channels, patch_size, patch_size))
        old_weights = theano.shared(
            old_weights.astype(dtype=theano.config.floatX))
        test_model = sf.Network(model_type=model_type,
                                weight_dims=dimensions,
                                p=pool,
                                group_size=group,
                                step=step,
                                lr=learn_rate,
                                opt=opt,
                                c=convolution,
                                test=test_model,
                                batch_size=batch_size,
                                random='y',
                                weights=old_weights)

        # compile the training, output, and test functions for the network
        print "compiling theano functions..."
        _, _, test = test_model.training_functions(train_data)

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        train_input = None
        test_input = None
        cm = None
        for layer in range(
                1, 2
        ):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer
            accuracy['layer' + str(layer)] = {}

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0]).shape[3]
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                train_data_fold = np.squeeze(train_data[fold_indices[0][fold]])
                train_labels_fold = np.squeeze(
                    train_labels[fold_indices[0][fold]])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(
                            test_data.shape[0], test_data.shape[1] *
                            test_data.shape[2] * test_data.shape[3])

                    train_input = train_data_fold.reshape(
                        train_data_fold.shape[0], train_data_fold.shape[1] *
                        train_data_fold.shape[2] * train_data_fold.shape[3])

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros(
                            (test_data.shape[0], n_filters, 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer -
                                        1](test_data[batch_start:batch_end]
                                           )  # test_data[0:batch_size]
                            temp = temp[0]
                            # for i in xrange(2):
                            #     for j in xrange(2):
                            #         pool_size = 48
                            #         i_start = i * pool_size
                            #         i_end = i_start + pool_size
                            #         j_start = j * pool_size
                            #         j_end = j_start + pool_size
                            #         test_input[batch_start:batch_end, :, i, j] = \
                            #             np.sum(
                            #                 temp[:, :, i_start:i_end, j_start:j_end],
                            #                 axis=(2, 3)
                            #         )
                            test_input[batch_start:batch_end] = quad_pool(temp)
                        test_input = test_input.reshape(
                            test_input.shape[0], test_input.shape[1] *
                            test_input.shape[2] * test_input.shape[3])

                    print "getting training data..."
                    # todo: also do quadrant pooling for training data (done) perhaps don't do batches here
                    # train_input = test[layer - 1](train_data_fold)  # test_data[0:batch_size]
                    # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] *
                    #                                      train_input[0].shape[2] * train_input[0].shape[3])
                    train_input = np.zeros(
                        (train_data_fold.shape[0], n_filters, 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer -
                                    1](train_data_fold[batch_start:batch_end]
                                       )  # test_data[0:batch_size]
                        temp = temp[0]
                        # for i in xrange(2):
                        #     for j in xrange(2):
                        #         pool_size = 48
                        #         i_start = i * pool_size
                        #         i_end = i_start + pool_size
                        #         j_start = j * pool_size
                        #         j_end = j_start + pool_size
                        #         train_input[batch_start:batch_end, :, i, j] = \
                        #             np.sum(
                        #                 temp[:, :, i_start:i_end, j_start:j_end],
                        #                 axis=(2, 3)
                        #         )
                        train_input[batch_start:batch_end] = quad_pool(temp)
                    train_input = train_input.reshape(
                        train_input.shape[0], train_input.shape[1] *
                        train_input.shape[2] * train_input.shape[3])

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print("Training linear SVM...")
                clf = svm.SVC(kernel="linear").fit(
                    train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print("Making predictions...")
                predictions = clf.predict(test_input)
                accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(
                    test_input, test_labels[0:examples])

                # display results and log them
                print(
                    "Accuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer,
                       accuracy['layer' + str(layer)]['fold' + str(fold)]))
                cm = confusion_matrix(test_labels[0:examples], predictions)
                log_file = open(directory_name + "/log.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer,
                       accuracy['layer' + str(layer)]['fold' + str(fold)]))
                log_file.close()

            # # visualize the confusion matrix
            # if test_model == 'y' and verbosity == 2:
            #
            #     import pylab as pl
            #
            #     pl.imshow(cm, interpolation='nearest')
            #     pl.title('Confusion Matrix for Network')
            #     pl.colorbar()
            #     pl.ylabel('True Label')
            #     pl.xlabel('Predicted Label')
            #     pl.show()

        # save the test results
        savemat('accuracy', accuracy)
Beispiel #14
0
def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1
        -------------------------------------------------------------------------------------------------------------
        ''')
    )
    parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type")
    parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no")
    parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename")
    parser.add_argument("-d", "--dimensions", type=int, nargs='+', default=([100, 256]),
                        help="the dimensions of the model: [neurons, input size] or [neurons, length, width]")
    parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions")
    parser.add_argument("-g", "--group", type=int, default=None, help="group size")
    parser.add_argument("-s", "--step", type=int, default=None, help="step size")
    parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate")
    parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations")
    parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS")
    parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'")
    parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data")
    parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples")
    parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch")
    parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'")
    parser.add_argument("-r", "--random", default='n', help="type of batches: random = 'y'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)

    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data.T).T
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                                     reshape((data.shape[0], 1,
                                                                              data.shape[2],
                                                                              data.shape[3])),
                                                                     kernel_shape=9), (
                                                    data.shape[0],
                                                    data.shape[2],
                                                    data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None

    # assert that the number of neurons in each layer is a perfect square
    for layer in xrange(len(args.dimensions)):
        assert np.sqrt(args.dimensions[layer][0]) % np.floor(np.sqrt(args.dimensions[layer][0])) == 0

    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group,
        step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test,
        batch_size=args.batch_size, random=args.random, weights=None
    )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l], model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B', jac=True,
                         options={'maxiter': args.iterations[l], 'disp': True})

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time        
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if args.aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")  # todo: create log file by looping through args
    # for arg in args:
    #     log_file.write(
    #         args.
    #     )
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m,
                                                                                    args.model[m],
                                                                                    args.dimensions[m],
                                                                                    args.iterations[m])
        )
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(
                " Groups: %d \n Step: %d" % (args.group, args.step)
            )
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(
        " Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)
    )
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")

    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

            # activations_norm['layer' + str(l)] = {}
            # activations_raw['layer' + str(l)] = {}
            # activations_shuffled['layer' + str(l)] = {}
            # reconstruction['layer' + str(l)] = {}
            # error_recon['layer' + str(l)] = {}
            # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            # define [mini]batch title
            batch_title = 'layer' + str(l) + '_batch' + '%03d' % batch

            # define norm and raw file names
            norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat'
            raw_file_name = directory_name + '/activation_raw_' + batch_title + '.mat'

            activations_norm[batch_title] = f_hat
            activations_raw[batch_title] = f
            activations_shuffled[batch_title] = f_hat_shuffled
            reconstruction[batch_title] = err
            error_recon[batch_title] = rec
            pooled[batch_title] = p

            # save model as well as weights and activations separately
            savemat(norm_file_name, activations_norm)
            # savemat(raw_file_name, activations_raw)

            if args.aws == 'y':

                k.key = norm_file_name
                k.set_contents_from_filename(norm_file_name)
                os.remove(norm_file_name)

                # k.key = raw_file_name
                # k.set_contents_from_filename(raw_file_name)
                # os.remove(raw_file_name)

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # output helper file for concatenating activations
    helper = {'batches': n_batches, 'output_size': f_hat.shape}
    helper_file_name = directory_name + '/helper.mat'
    savemat(helper_file_name, helper)
    if args.aws == 'y':
        k.key = helper_file_name
        k.set_contents_from_filename(helper_file_name)
        os.remove(helper_file_name)

    # get data if not on AWS
    if args.aws == 'n':
        f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data)
        activations_norm = {"layer0": f_hat}

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution,
                            pad=0, examples=None, channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(args.dimensions[l][0],
                                                                             data.shape[0] *
                                                                             activations_shuffled[layer].shape[2] *
                                                                             activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution,
                                pad=1, examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
                                                                                     pooled['layer0'].shape[1],
                                                                                     pooled['layer0'].shape[2],
                                                                                     pooled['layer0'].shape[3]),
                                                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']    #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp

    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                                     int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] *
                                               test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(data.shape[0], data.shape[1] *
                                           data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(test_input[0].shape[0], test_input[0].shape[1] *
                                                   test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] *
                                                  train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write(
                "\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)
            )
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()
Beispiel #15
0
def main():

    # define global parameters
    model_type = ["SparseFilter"]
    convolution = "n"
    filename = "unlabeled_10000.mat"
    # 'STL_10_unlabeled_patches.h5'  # _sample10'  # 'patches.mat'  # LCN  # _raw10  # _raw1000
    channels = 3
    patch_size = 14
    n_filters = 1600  # 1600
    dimensions = ([n_filters, patch_size * patch_size * channels],)  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.0001
    iterations = [100]  # [50]  # [1]
    verbosity = 2
    opt = "GD"
    whitening = "y"
    test_model = "y"
    examples = None
    batch_size = 1000  # 360  # 8000  # todo: figure out why large batches produce nan cost

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = h5py.File(file_path, "r")["patches"]
    data = np.array(data)
    data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    channel_length = patch_size ** 2
    for channel in xrange(channels):
        start = channel * channel_length
        end = start + channel_length
        data[start:end] -= data[start:end].mean(axis=0)
        data[start:end] /= data[start:end].std(axis=0) + 1e-8
        data[start:end] = whiten(data[start:end].T).T

    data = np.float32(data.T)

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=model_type,
        weight_dims=dimensions,
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c=convolution,
        test=test_model,
        batch_size=batch_size,
        random="y",
        weights=None,
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[0]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print ("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost["layer" + str(l)] = cost_layer
        weights["layer" + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print ("Elapsed training time: %f" % elapsed)

    # create sub-folder for saved model
    directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
    directory_name = directory_format % time.localtime()[0:6]
    os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + "/model.pkl"
    pickle.dump(model, open(full_path, "w"), pickle.HIGHEST_PROTOCOL)

    # save weights separately
    savemat(directory_name + "/weights.mat", weights)

    # create log file
    ex = None
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m])
        )
        if model == "GroupSF" or model == "GroupConvolutionalSF":
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening))
    log_file.write("\nElapsed training time: %f" % elapsed)
    log_file.close()

    # get variables and save
    if verbosity >= 1:

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * batch_size
            end = begin + batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end])

            activations_norm["layer" + str(l) + "_batch" + str(batch)] = f_hat
            activations_raw["layer" + str(l) + "_batch" + str(batch)] = f
            activations_shuffled["layer" + str(l) + "_batch" + str(batch)] = f_hat_shuffled
            reconstruction["layer" + str(l) + "_batch" + str(batch)] = err
            error_recon["layer" + str(l) + "_batch" + str(batch)] = rec
            pooled["layer" + str(l) + "_batch" + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + "/activations_norm_" + "layer" + str(l) + "_batch" + str(batch) + ".mat",
                activations_norm,
            )
            savemat(
                directory_name + "/activation_raw_" + "layer" + str(l) + "_batch" + str(batch) + ".mat", activations_raw
            )

    # display figures
    if verbosity == 2:

        # if GD, plot the cost function over time
        if opt == "GD":
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(
            weights["layer0"].T, color="gray", convolution=convolution, pad=0, examples=None, channels=channels
        )

        # # visualize the distribution of lifetime and population sparseness
        # for l in xrange(len(dimensions)):
        #     layer = 'layer' + str(l)
        #     if convolution == 'n':
        #         visualize.dispSparseHist(activations_norm[layer], l)
        #     elif convolution == 'y':
        #         visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0],
        #                                                                      data.shape[0] *
        #                                                                      activations_shuffled[layer].shape[2] *
        #                                                                      activations_shuffled[layer].shape[3]),
        #                                  layer=l)
        #
        # # visualize the distribution of activity across the "cortical sheet" and reconstruction
        # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution,
        #                     pad=1, examples=100)
        #
        # # visualize reconstruction capabilities
        # if convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1)
        # elif convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])
        #
        # # additional visualizations for convolutional network
        # if convolution == 'y':
        #
        #     dim = activations_raw['layer0'].shape[2]
        #
        #     # visualize an example of a convolved image
        #     visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim)
        #     # print activations_raw['layer0']
        #
        #     # visualize max-pooled activations and LCN output
        #     visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1,
        #                                                                              pooled['layer0'].shape[1],
        #                                                                              pooled['layer0'].shape[2],
        #                                                                              pooled['layer0'].shape[3]),
        #                                         dim=dim / 2)
        #
        #     # visualize an example of a LCNed convolved image after max pooling
        #     # temp = activations_raw['layer0']    #[0, :, :, :]
        #     temp = pooled['layer0']    #[0, :, :, :]
        #     # print temp.shape
        #     for i in range(temp.shape[1]):
        #         temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5)
        #     # temp = scaling.LCNinput(temp, kernel_shape=5)
        #     visualize.visualize_convolved_image(temp, dim=dim / 2)
        #     # print temp

    """ ================================ Test the Model ======================================= """

    # test the model if evaluating classification performance
    if test_model == "y":
        print "testing..."

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"
        test_labels_file = "test.mat"
        model_type = ["ConvolutionalSF"]
        dimensions = ([1, n_filters, patch_size, patch_size],)
        convolution = "y"
        test_model = "y"
        batch_size = 100

        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)["X"]
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)["y"]

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)["X"]
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)["y"]

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)["fold_indices"]
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # initialize convolutional network with learned parameters from above
        old_weights = model.layers[0].w.eval().reshape((-1, channels, patch_size, patch_size))
        old_weights = theano.shared(old_weights.astype(dtype=theano.config.floatX))
        test_model = sf.Network(
            model_type=model_type,
            weight_dims=dimensions,
            p=pool,
            group_size=group,
            step=step,
            lr=learn_rate,
            opt=opt,
            c=convolution,
            test=test_model,
            batch_size=batch_size,
            random="y",
            weights=old_weights,
        )

        # compile the training, output, and test functions for the network
        print "compiling theano functions..."
        _, _, test = test_model.training_functions(train_data)

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        train_input = None
        test_input = None
        cm = None
        for layer in range(1, 2):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer
            accuracy["layer" + str(layer)] = {}

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0]).shape[3]
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                train_data_fold = np.squeeze(train_data[fold_indices[0][fold]])
                train_labels_fold = np.squeeze(train_labels[fold_indices[0][fold]])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(
                            test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]
                        )

                    train_input = train_data_fold.reshape(
                        train_data_fold.shape[0],
                        train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3],
                    )

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros((test_data.shape[0], n_filters, 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer - 1](test_data[batch_start:batch_end])  # test_data[0:batch_size]
                            temp = temp[0]
                            # for i in xrange(2):
                            #     for j in xrange(2):
                            #         pool_size = 48
                            #         i_start = i * pool_size
                            #         i_end = i_start + pool_size
                            #         j_start = j * pool_size
                            #         j_end = j_start + pool_size
                            #         test_input[batch_start:batch_end, :, i, j] = \
                            #             np.sum(
                            #                 temp[:, :, i_start:i_end, j_start:j_end],
                            #                 axis=(2, 3)
                            #         )
                            test_input[batch_start:batch_end] = quad_pool(temp)
                        test_input = test_input.reshape(
                            test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]
                        )

                    print "getting training data..."
                    # todo: also do quadrant pooling for training data (done) perhaps don't do batches here
                    # train_input = test[layer - 1](train_data_fold)  # test_data[0:batch_size]
                    # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] *
                    #                                      train_input[0].shape[2] * train_input[0].shape[3])
                    train_input = np.zeros((train_data_fold.shape[0], n_filters, 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer - 1](train_data_fold[batch_start:batch_end])  # test_data[0:batch_size]
                        temp = temp[0]
                        # for i in xrange(2):
                        #     for j in xrange(2):
                        #         pool_size = 48
                        #         i_start = i * pool_size
                        #         i_end = i_start + pool_size
                        #         j_start = j * pool_size
                        #         j_end = j_start + pool_size
                        #         train_input[batch_start:batch_end, :, i, j] = \
                        #             np.sum(
                        #                 temp[:, :, i_start:i_end, j_start:j_end],
                        #                 axis=(2, 3)
                        #         )
                        train_input[batch_start:batch_end] = quad_pool(temp)
                    train_input = train_input.reshape(
                        train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]
                    )

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print ("Training linear SVM...")
                clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print ("Making predictions...")
                predictions = clf.predict(test_input)
                accuracy["layer" + str(layer)]["fold" + str(fold)] = clf.score(test_input, test_labels[0:examples])

                # display results and log them
                print (
                    "Accuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)])
                )
                cm = confusion_matrix(test_labels[0:examples], predictions)
                log_file = open(directory_name + "/log.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f"
                    % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)])
                )
                log_file.close()

            # # visualize the confusion matrix
            # if test_model == 'y' and verbosity == 2:
            #
            #     import pylab as pl
            #
            #     pl.imshow(cm, interpolation='nearest')
            #     pl.title('Confusion Matrix for Network')
            #     pl.colorbar()
            #     pl.ylabel('True Label')
            #     pl.xlabel('Predicted Label')
            #     pl.show()

        # save the test results
        savemat("accuracy", accuracy)
def main():

    # define global parameters
    filename = 'patches.mat'
    n_filters = 100
    learn_rate = 0.001
    iterations = [200]

    # load in data and preprocess
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = loadmat(file_path)['X']
    data -= data.mean(axis=0)
    data = np.float32(data.T)

    # construct the network
    print "building model..."
    weights = tf.Variable(tf.random_uniform([n_filters, data.shape[1]]))
    model = SparseFilter(weights, data)

    # define loss, optimizer, and train function
    loss = tf.reduce_sum(model.feed_forward())
    optimizer = tf.train.GradientDescentOptimizer(learn_rate)
    train = optimizer.minimize(loss)

    # initialize all the variables
    init = tf.initialize_all_variables()

    # run the session
    sess = tf.Session()
    sess.run(init)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost_running = []

    # iterate over training epochs
    for epoch in xrange(iterations[0]):

        sess.run(train)
        current_cost = sess.run(loss)
        cost_running.append(current_cost)
        print("Cost at epoch %i: %0.4f" % (epoch, current_cost))

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # plot the cost function over time
    c = {'layer0': cost_running}
    visualize.plotCost(c)

    # visualize the receptive fields of the first layer
    weights_final = sess.run(weights)
    print weights_final.shape
    visualize.drawplots(weights_final.T,
                        color='gray',
                        convolution='n',
                        pad=0,
                        examples=None,
                        channels=1)