Ejemplo n.º 1
0
def main():

    # define global parameters
    model_type = [
        'ConvolutionalSF',
        'ConvolutionalSF',
        'ConvolutionalSF',
        'ConvolutionalSF',
        # 'ConvolutionalSF'
    ]
    convolution = 'y'
    filename = "unlabeled_10000.mat"  # train  # unlabeled  # STL_10_lcn_unlabeled.mat.h5
    channels = 3
    patch_size = 14
    n_filters = [
        100,
        400,
        1600,
        6400
        # 800,
        # 1600
    ]  #
    # [100, 400, 1600, 6400, 25600]  # 1600  # increasing neurons x4 maintains dimensionality
    dimensions = (
        [n_filters[0], channels, 11, 11],
        [n_filters[1], n_filters[0], 4, 4],
        [n_filters[2], n_filters[1], 3, 3],
        [n_filters[3], n_filters[2], 2, 2],
        # [n_filters[4], n_filters[3], 3, 3]
    )
    # ([n_filters, patch_size * patch_size * channels],)  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.001  # 0.0001
    iterations = [
        3,
        3,
        2,
        2
        # 1,
        # 1
    ]  # [5, 5, 5]  # [50]  # [100]
    verbosity = 0
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 100  # 360  # 8000
    lcn_kernel = [
        5,
        5,
        3,
        3
    ]  # these may have to be odd values so that there is a middle
    aws = 'y'
    #
    # # load in data
    # print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = None
    if filename == 'train.mat' or filename == 'unlabeled_10000.mat':
        data = loadmat(file_path)['X']
    elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5':
        data = h5py.File(file_path, 'r')['X']
        data = np.array(data)
        data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    print "pre-processing data..."
    data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
    data = data[0:examples, :, :, :]
    print data.shape
    if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat':
        for channel in range(channels):
            data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                                 reshape((data.shape[0], 1,
                                                                          data.shape[2],
                                                                          data.shape[3])),
                                                                 kernel_shape=9), (
                                                data.shape[0],
                                                data.shape[2],
                                                data.shape[3])
                                                )
    #
    # # determine number of batches
    # n_batches, rem = divmod(data.shape[0], batch_size)
    #
    # # construct the network
    # print "building model..."
    # model = sf.Network(
    #     model_type=model_type,
    #     weight_dims=dimensions,
    #     p=pool,
    #     group_size=group,
    #     step=step,
    #     lr=learn_rate,
    #     opt=opt,
    #     c=convolution,
    #     test=test_model,
    #     batch_size=batch_size,
    #     random='y',
    #     weights=None,
    #     lcn_kernel=lcn_kernel
    # )
    #
    # # compile the training, output, and test functions for the network
    # print "compiling theano functions..."
    # train, outputs, test = model.training_functions(data)
    #
    # # train the sparse filtering network
    # print "training network..."
    # start_time = time.time()
    # cost = {}
    # weights = {}
    # for l in xrange(model.n_layers):
    #
    #     cost_layer = []
    #     w = None
    #
    #     # iterate over training epochs
    #     for epoch in xrange(iterations[l]):
    #
    #         # go though [mini]batches
    #         for batch_index in xrange(n_batches):
    #
    #             # create index for random [mini]batch
    #             index = np.int32(np.random.randint(data.shape[0], size=batch_size))
    #
    #             c, w = train[l](index=index)
    #             cost_layer.append(c)
    #             print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))
    #
    #     # add layer cost and weights to the dictionaries
    #     cost['layer' + str(l)] = cost_layer
    #     weights['layer' + str(l)] = w
    #
    # # calculate and display elapsed training time
    # elapsed = time.time() - start_time
    # print('Elapsed training time: %f' % elapsed)
    #
    # create sub-folder for saved model
    directory_name = None
    if aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        # directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        # directory_name = directory_format % time.localtime()[0:6]

        directory_name = "./saved/2016-01-25_19h17m41s"

        # os.mkdir(directory_name)
    #
    # # save the model for later use
    # full_path = directory_name + '/model.pkl'
    # pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    # if aws == 'y':
    #     k.key = full_path
    #     k.set_contents_from_filename(full_path)
    #     os.remove(full_path)
    #
    # # save weights separately
    # savemat(directory_name + '/weights.mat', weights)
    # if aws == 'y':
    #     k.key = directory_name + '/weights.mat'
    #     k.set_contents_from_filename(directory_name + '/weights.mat')
    #     os.remove(directory_name + '/weights.mat')
    #
    # # save the cost functions
    # savemat(directory_name + '/cost.mat', cost)
    # if aws == 'y':
    #     k.key = directory_name + '/cost.mat'
    #     k.set_contents_from_filename(directory_name + '/cost.mat')
    #     os.remove(directory_name + '/cost.mat')
    #
    # # create log file
    # log_file = open(directory_name + "/log.txt", "wb")  # todo: create log file by looping through args
    # for m in range(len(model_type)):
    #     log_file.write(
    #         "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m,
    #                                                                                 model_type[m],
    #                                                                                 dimensions[m],
    #                                                                                 iterations[m])
    #     )
    #     if model == 'GroupSF' or model == 'GroupConvolutionalSF':
    #         log_file.write(
    #             " Groups: %d \n Step: %d" % (group, step)
    #         )
    #     ex = data.shape[0]
    #     if examples is not None:
    #         ex = examples
    #
    # log_file.write(
    #     " Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)
    # )
    # log_file.write('\nElapsed training time: %f' % elapsed)
    # log_file.close()
    # if aws == 'y':
    #     k.key = directory_name + "/log.txt"
    #     k.set_contents_from_filename(directory_name + "/log.txt")
    #     os.remove(directory_name + "/log.txt")

    ''' ================================ Test the Model ======================================= '''

    # todo: train a model and save it; then load in the model and test it so that grid search can be performed

    # load in the model
    if aws == 'y':
        k.key = directory_name + '/model.pkl'
        # model = k.read(k.key)
        model = pickle.loads(k.get_contents_as_string())  # open(model, 'rb')

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # test the model if evaluating classification performance
    if test_model == 'y':
        print 'testing...'

        from sklearn import svm

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"  # "train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"  # "test.mat"
        test_labels_file = "test.mat"
        batch_size = 100

        # todo: read in lcn data
        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)['y']

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)['y']

        # # preproces training and test data
        # print "preprocessing training and test data..."
        # print train_data.shape

        # train_data = np.float32(train_data.reshape(-1,
        #                                            3,
        #                                            int(np.sqrt(train_data.shape[1] / 3)),
        #                                            int(np.sqrt(train_data.shape[1] / 3)))
        #                         )

        # print train_data.shape
        # for channel in range(channels):
        #     train_data[:, channel, :, :] = np.reshape(scaling.LCNinput(train_data[:, channel, :, :].
        #                                                                reshape((train_data.shape[0], 1,
        #                                                                         train_data.shape[2],
        #                                                                         train_data.shape[3])),
        #                                                                kernel_shape=9), (
        #                                               train_data.shape[0],
        #                                               train_data.shape[2],
        #                                               train_data.shape[3]))
        #
        # test_data = np.float32(test_data.reshape(-1,
        #                                          3,
        #                                          int(np.sqrt(test_data.shape[1] / 3)),
        #                                          int(np.sqrt(test_data.shape[1] / 3)))
        #                         )
        # for channel in range(channels):
        #     test_data[:, channel, :, :] = np.reshape(scaling.LCNinput(test_data[:, channel, :, :].
        #                                                               reshape((test_data.shape[0], 1,
        #                                                                        test_data.shape[2],
        #                                                                        test_data.shape[3])),
        #                                                               kernel_shape=9), (
        #                                              test_data.shape[0],
        #                                              test_data.shape[2],
        #                                              test_data.shape[3]))

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)['fold_indices']
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        train_input = None
        test_input = None
        cm = None
        c_parameters = [0.02, 0.005, 0.002, 0.001]
        for layer in range(1, model.n_layers + 1):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer and list for calculations
            accuracy['layer' + str(layer)] = {}
            accuracy_list = []

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0, :, :, :].reshape((1, 3, 96, 96)))[0].shape[3] / 2
            print quadrant_size
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                fold_index = fold_indices[0][fold].astype('int')
                train_data_fold = np.squeeze(train_data[fold_index])
                train_labels_fold = np.squeeze(train_labels[fold_index])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] *
                                                       test_data.shape[2] * test_data.shape[3])

                    train_input = train_data_fold.reshape(train_data_fold.shape[0], train_data_fold.shape[1] *
                                                          train_data_fold.shape[2] * train_data_fold.shape[3])

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros((test_data.shape[0], n_filters[layer - 1], 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer - 1](test_data[batch_start:batch_end])
                            temp = temp[0]
                            test_input[batch_start:batch_end] = quad_pool(temp)[0]
                        test_input = test_input.reshape(test_input.shape[0], test_input.shape[1] *
                                                        test_input.shape[2] * test_input.shape[3])

                    print "getting training data..."
                    train_input = np.zeros((train_data_fold.shape[0], n_filters[layer - 1], 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer - 1](train_data_fold[batch_start:batch_end])
                        temp = temp[0]
                        train_input[batch_start:batch_end] = quad_pool(temp)[0]
                    train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] *
                                                      train_input.shape[2] * train_input.shape[3])

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print("Training linear SVM...")
                clf = svm.SVC(C=c_parameters[layer - 1], kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print("Making predictions...")
                accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(test_input, test_labels[0:examples])
                accuracy_list.append(accuracy['layer' + str(layer)]['fold' + str(fold)])
                training_accuracy = clf.score(train_input, np.ravel(train_labels_fold[0:examples]))

                # display results and log them
                print("Accuracy of the classifier for fold %d at layer %1d: %0.4f" %
                      (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]))
                print "classification performance on training set: %0.4f" % training_accuracy

                log_file = open(directory_name + "/log_test.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" %
                    (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])
                )
                log_file.close()

            # calculate and print out average accuracy and std
            avg = np.mean(accuracy_list)
            std = np.std(accuracy_list)
            print "The overall accuracy of layer %d: %0.4f +/- (%0.4f)" % (layer, float(avg), float(std))
            log_file = open(directory_name + "/log_test.txt", "a")
            log_file.write(
                "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" %
                (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])
            )
            log_file.close()

        # save for aws
        if aws == 'y':
            k.key = directory_name + "/log_test.txt"
            k.set_contents_from_filename(directory_name + "/log_test.txt")

        # save the test results
        savemat('accuracy', accuracy)
Ejemplo n.º 2
0
def main():
    # parse options from the command line
    parser = argparse.ArgumentParser(
        prog='PROG',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
        -------------------------------------------------------------------------------------------------------------
        This is a deep neural network architecture for training sparse filters. Example uses:
            $ python test.py
            $ python test.py -m GroupSF -v 1 -g 3 -s 1
            $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100
            $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat
              -i 100 150 -t y -v 1

        In the convolutional case, the extra "1" is added automatically for broadcasting.
        -------------------------------------------------------------------------------------------------------------
        '''))
    parser.add_argument("-m",
                        "--model",
                        default=['SparseFilter'],
                        nargs='+',
                        help="the model type")
    parser.add_argument("-c",
                        "--convolution",
                        default="n",
                        help="convolution, yes or no")
    parser.add_argument("-f",
                        "--filename",
                        default="patches.mat",
                        help="the data filename")
    parser.add_argument(
        "-d",
        "--dimensions",
        type=int,
        nargs='+',
        default=([100, 256]),
        help=
        "the dimensions of the model: [neurons, input size] or [neurons, length, width]"
    )
    parser.add_argument("-p",
                        "--pool",
                        type=int,
                        nargs='+',
                        default=None,
                        help="pooling dimensions")
    parser.add_argument("-g",
                        "--group",
                        type=int,
                        default=None,
                        help="group size")
    parser.add_argument("-s",
                        "--step",
                        type=int,
                        default=None,
                        help="step size")
    parser.add_argument("-l",
                        "--learn_rate",
                        type=float,
                        default=.001,
                        help="learning rate")
    parser.add_argument("-i",
                        "--iterations",
                        type=int,
                        nargs='+',
                        default=[100],
                        help="number of iterations")
    parser.add_argument("-v",
                        "--verbosity",
                        type=int,
                        default=0,
                        help="verbosity: 0 no plot; 1 plots")
    parser.add_argument("-o",
                        "--opt",
                        default="GD",
                        help="optimization method: GD or L-BFGS")
    parser.add_argument("-w",
                        "--whitening",
                        default='n',
                        help="whitening: 'y' or 'n'")
    parser.add_argument("-t",
                        "--test",
                        default='n',
                        help="test classification performance: 'y' or 'n'")
    parser.add_argument("-a",
                        "--channels",
                        type=int,
                        default=1,
                        help="number of channels in data")
    parser.add_argument("-e",
                        "--examples",
                        type=int,
                        default=None,
                        help="number of training examples")
    parser.add_argument("-b",
                        "--batch_size",
                        type=int,
                        default=1000,
                        help="number of examples in [mini]batch")
    parser.add_argument("-z",
                        "--aws",
                        default='n',
                        help="run on aws: 'y' or 'n'")
    args = parser.parse_args()
    args.dimensions = parse_dims(args)
    args.iterations = parse_iter(args)
    ''' =================================== Load in the data =================================== '''

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", args.filename)
    data = loadmat(file_path)['X']

    # reshape and preprocess data
    print "pre-processing data ..."
    video = None
    if args.filename == 'patches_video.mat':
        video = data
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T

    if args.convolution == 'n':
        if args.whitening == 'y':
            data -= data.mean(axis=0)
            data = whiten(data)
        elif args.whitening == 'n' and args.channels == 1:
            data -= data.mean(axis=0)
        # elif args.whitening == 'n' and args.channels == 3:
        # data = np.float32(data)
        data = np.float32(data.T)

    elif args.convolution == 'y':

        if args.filename == 'kyotoData.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=9)

        elif args.filename == 'CIFAR_data.mat':
            data = np.float32(
                data.reshape(-1, 1, int(np.sqrt(data.shape[1])),
                             int(np.sqrt(data.shape[1]))))
            data = scaling.LCNinput(data, kernel_shape=5)
            data = data[0:args.examples, :, :, :]

        elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat':
            data = np.float32(
                data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)),
                             int(np.sqrt(data.shape[1] / 3))))
            data = data[0:args.examples, :, :, :]
            args.channels = data.shape[1]
            for channel in range(args.channels):
                data[:, channel, :, :] = np.reshape(
                    scaling.LCNinput(data[:, channel, :, :].reshape(
                        (data.shape[0], 1, data.shape[2], data.shape[3])),
                                     kernel_shape=9),
                    (data.shape[0], data.shape[2], data.shape[3]))

    # assert that batch size is valid and get number of batches
    n_batches, rem = divmod(data.shape[0], args.batch_size)
    assert rem == 0

    # other assertions
    assert len(args.model) == len(args.iterations)
    if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF':
        assert args.group is not None
        assert args.step is not None
    ''' ============================= Build and train the network ============================= '''

    # construct the network
    print "building model..."
    model = sf.Network(model_type=args.model,
                       weight_dims=args.dimensions,
                       p=args.pool,
                       group_size=args.group,
                       step=args.step,
                       lr=args.learn_rate,
                       opt=args.opt,
                       c=args.convolution,
                       test=args.test,
                       batch_size=args.batch_size
                       )  # TODO: custom learning rates for each layer

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    t = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        if args.opt == 'GD':
            for epoch in xrange(args.iterations[l]):

                # go though [mini]batches
                for batch_index in xrange(n_batches):

                    c, w = train[l](index=batch_index)
                    cost_layer.append(c)
                    print("Layer %i cost at epoch %i and batch %i: %f" %
                          (l + 1, epoch, batch_index, c))

        elif args.opt == 'L-BFGS':
            w = minimize(train[l],
                         model.layers[l].w.eval().flatten(),
                         method='L-BFGS-B',
                         jac=True,
                         options={
                             'maxiter': args.iterations[l],
                             'disp': True
                         })

            if args.convolution == 'n':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1])
            elif args.convolution == 'y':
                w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1],
                                args.dimensions[0][2], args.dimensions[0][3])

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - t
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    if args.aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif args.aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if args.aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if args.aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")
    for m in range(len(args.model)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, args.model[m], args.dimensions[m], args.iterations[m]))
        if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step))
        ex = data.shape[0]
        if args.examples is not None:
            ex = args.examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (args.filename, ex, args.whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if args.aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")
    ''' =============================== Verbosity Options ===================================== '''

    # get variables and saves
    if args.verbosity >= 1:

        # # get variables of interest
        # activations_norm = {}
        # activations_raw = {}
        # activations_shuffled = {}
        # reconstruction = {}
        # error_recon = {}
        # pooled = {}

        # for l in xrange(len(args.dimensions)):

        # activations_norm['layer' + str(l)] = {}
        # activations_raw['layer' + str(l)] = {}
        # activations_shuffled['layer' + str(l)] = {}
        # reconstruction['layer' + str(l)] = {}
        # error_recon['layer' + str(l)] = {}
        # pooled['layer' + str(l)] = {}

        for batch in xrange(n_batches):

            # get variables of interest
            activations_norm = {}
            activations_raw = {}
            activations_shuffled = {}
            reconstruction = {}
            error_recon = {}
            pooled = {}

            # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
            begin = batch * args.batch_size
            end = begin + args.batch_size
            f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers -
                                                            1](data[begin:end])

            # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat
            # activations_raw['layer' + str(l)]['batch' + str(batch)] = f
            # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled
            # reconstruction['layer' + str(l)]['batch' + str(batch)] = err
            # error_recon['layer' + str(l)]['batch' + str(batch)] = rec
            # pooled['layer' + str(l)]['batch' + str(batch)] = p

            activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat
            activations_raw['layer' + str(l) + '_batch' + str(batch)] = f
            activations_shuffled['layer' + str(l) + '_batch' +
                                 str(batch)] = f_hat_shuffled
            reconstruction['layer' + str(l) + '_batch' + str(batch)] = err
            error_recon['layer' + str(l) + '_batch' + str(batch)] = rec
            pooled['layer' + str(l) + '_batch' + str(batch)] = p

            # save model as well as weights and activations separately
            savemat(
                directory_name + '/activations_norm_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_norm)
            savemat(
                directory_name + '/activation_raw_' + 'layer' + str(l) +
                '_batch' + str(batch) + '.mat', activations_raw)

            if args.aws == 'y':

                k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activations_norm_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activations_norm_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

                k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \
                    str(batch) + '.mat'
                k.set_contents_from_filename(directory_name +
                                             '/activation_raw_' + 'layer' +
                                             str(l) + '_batch' + str(batch) +
                                             '.mat')
                os.remove(directory_name + '/activation_raw_' + 'layer' +
                          str(l) + '_batch' + str(batch) + '.mat')

        # savemat(directory_name + '/weights.mat', weights)
        # if args.aws == 'y':
        #     k.key = directory_name + '/weights.mat'
        #     k.set_contents_from_filename(directory_name + '/weights.mat')
        #     os.remove(directory_name + '/weights.mat')

        #     # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        #     f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size])
        #
        #     activations_norm['layer' + str(l)] = f_hat
        #     activations_raw['layer' + str(l)] = f
        #     activations_shuffled['layer' + str(l)] = f_hat_shuffled
        #     reconstruction['layer' + str(l)] = err
        #     error_recon['layer' + str(l)] = rec
        #     pooled['layer' + str(l)] = p
        #
        # # save model as well as weights and activations separately
        # savemat(directory_name + '/weights.mat', weights)
        # savemat(directory_name + '/activations_norm.mat', activations_norm)
        # savemat(directory_name + '/activation_raw.mat', activations_raw)

    # display figures
    if args.verbosity == 2:

        # if GD, plot the cost function over time
        if args.opt == 'GD':
            visualize.plotCost(cost)

        # visualize the receptive fields of the first layer
        visualize.drawplots(weights['layer0'].T,
                            color='gray',
                            convolution=args.convolution,
                            pad=0,
                            examples=None,
                            channels=args.channels)

        # visualize the distribution of lifetime and population sparseness
        for l in xrange(len(args.dimensions)):
            layer = 'layer' + str(l)
            if args.convolution == 'n':
                visualize.dispSparseHist(activations_norm[layer], l)
            elif args.convolution == 'y':
                visualize.dispSparseHist(activations_shuffled[layer].reshape(
                    args.dimensions[l][0],
                    data.shape[0] * activations_shuffled[layer].shape[2] *
                    activations_shuffled[layer].shape[3]),
                                         layer=l)

        # visualize the distribution of activity across the "cortical sheet" and reconstruction
        if args.filename == 'patches_video.mat':
            f_hat = activations_norm['layer0'].T.reshape(
                video.shape[0], video.shape[1], args.dimensions[0][0])
            visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1)
        else:
            visualize.drawplots(activations_norm['layer0'],
                                color='gray',
                                convolution=args.convolution,
                                pad=1,
                                examples=100)

        # # visualize reconstruction capabilities
        # if args.convolution == 'n':
        #     visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1)
        # elif args.convolution == 'y':
        #     visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'],
        #                                            color='gray', convolution=args.convolution)
        # print('Reconstructed error: %e' % reconstruction['layer0'])

        # additional visualizations for convolutional network
        if args.convolution == 'y':

            dim = activations_raw['layer0'].shape[2]

            # visualize an example of a convolved image
            visualize.visualize_convolved_image(activations_raw['layer0'],
                                                dim=dim)
            # print activations_raw['layer0']

            # visualize max-pooled activations and LCN output
            visualize.visualize_convolved_image(
                pooled['layer0'][0, :, :, :].reshape(
                    1, pooled['layer0'].shape[1], pooled['layer0'].shape[2],
                    pooled['layer0'].shape[3]),
                dim=dim / 2)

            # visualize an example of a LCNed convolved image after max pooling
            # temp = activations_raw['layer0']    #[0, :, :, :]
            temp = pooled['layer0']  #[0, :, :, :]
            # print temp.shape
            for i in range(temp.shape[1]):
                temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape(
                    (1, 1, dim / 2, dim / 2)),
                                                    kernel_shape=5)
            # temp = scaling.LCNinput(temp, kernel_shape=5)
            visualize.visualize_convolved_image(temp, dim=dim / 2)
            # print temp
    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if args.test == 'y':

        from sklearn import svm
        from sklearn.metrics import confusion_matrix

        train_labels = loadmat(file_path)['y']

        file_path = os.path.join(base_path, "data", "CIFAR_test.mat")
        test_data = loadmat(file_path)['X']
        test_labels = loadmat(file_path)['y']

        # reshape and normalize the data
        if args.convolution == 'y':
            test_data = np.float32(
                test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])),
                                  int(np.sqrt(test_data.shape[1]))))
            test_data = scaling.LCNinput(test_data, kernel_shape=5)
            test_data = test_data[0:args.examples, :, :, :]

        # get SVM test results for pixels to last layer
        train_input = None
        for layer in range(model.n_layers + 1):

            # pixel inputs
            if layer == 0:

                test_input = test_data.reshape(
                    test_data.shape[0], test_data.shape[1] *
                    test_data.shape[2] * test_data.shape[3])

                train_input = data.reshape(
                    data.shape[0],
                    data.shape[1] * data.shape[2] * data.shape[3])

            # hidden layers
            elif layer > 0:

                # get the output of the current layer in the model given the training / test data and then reshape
                # TODO: use raw output as training and testing data?
                test_input = test[layer - 1](test_data[0:args.batch_size])
                test_input = test_input[0].reshape(
                    test_input[0].shape[0], test_input[0].shape[1] *
                    test_input[0].shape[2] * test_input[0].shape[3])

                train_input = activations_norm['layer' + str(layer - 1)]
                train_input = train_input.reshape(
                    train_input.shape[0], train_input.shape[1] *
                    train_input.shape[2] * train_input.shape[3])

            # train linear support vector machine
            clf = svm.SVC(kernel="linear").fit(
                train_input, np.ravel(train_labels[0:args.examples]))

            # get predictions from SVM and calculate accuracy
            predictions = clf.predict(test_input)
            accuracy = clf.score(test_input, test_labels[0:args.examples])

            # display results and log them
            print("Accuracy of the classifier at layer %1d: %0.4f" %
                  (layer, accuracy))
            cm = confusion_matrix(test_labels[0:args.examples], predictions)
            log_file = open(directory_name + "/log.txt", "a")
            log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" %
                           (layer, accuracy))
            log_file.close()

    # visualize the confusion matrix
    if args.test == 'y' and args.verbosity == 2:

        import pylab as pl

        pl.imshow(cm, interpolation='nearest')
        pl.title('Confusion Matrix for Network')
        pl.colorbar()
        pl.ylabel('True Label')
        pl.xlabel('Predicted Label')
        pl.show()
Ejemplo n.º 3
0
def main():

    # define global parameters
    model_type = ['GroupSF']
    convolution = 'n'
    filename = "unlabeled_10000.mat"
    input_examples = 10000
    channels = 3
    n_filters = [10000]
    n_hidden_previous_layer = 5000
    dimensions = ([n_filters[0], n_hidden_previous_layer], )
    pool = None
    group = 3
    step = 1
    learn_rate = 0.001  # 0.0001
    iterations = [10]
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 1000
    lcn_kernel = [
        5, 5, 3, 3, 3
    ]  # these may have to be odd values so that there is a middle
    aws = 'y'

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = None
    if filename == 'train.mat' or filename == 'unlabeled_10000.mat':
        data = loadmat(file_path)['X']
    elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5':
        data = h5py.File(file_path, 'r')['X']
        data = np.array(data)
        data = data.T

    # preprocess the data and convert to float
    print "pre-processing data..."
    data = np.float32(
        data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)),
                     int(np.sqrt(data.shape[1] / 3))))
    data = data[0:examples, :, :, :]
    print data.shape
    if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat':
        for channel in range(channels):
            data[:, channel, :, :] = np.reshape(
                scaling.LCNinput(data[:, channel, :, :].reshape(
                    (data.shape[0], 1, data.shape[2], data.shape[3])),
                                 kernel_shape=9),
                (data.shape[0], data.shape[2], data.shape[3]))

    # create sub-folder for saved model
    directory_name = None
    if aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # load in the front-end of the model and obtain output
    frontend_model_directory_name = "./saved/2016-01-26_18h54m23s"
    if aws == 'y':
        k.key = frontend_model_directory_name + '/model.pkl'
        model = pickle.loads(k.get_contents_as_string())

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # get output of frontend model to treat as input to DCTSF
    print "getting output of frontend model..."
    batch_size_out_data = 50
    train_input = np.zeros((input_examples, n_hidden_previous_layer, 1, 1))
    n_batches = input_examples / batch_size_out_data
    for batch in xrange(n_batches):
        print "for batch %d" % batch
        batch_start = batch * batch_size_out_data
        batch_end = batch_start + batch_size_out_data
        temp = test[model.n_layers - 1](data[batch_start:batch_end])
        train_input[batch_start:batch_end] = np.sum(temp[0],
                                                    axis=(2, 3),
                                                    keepdims=True)
    train_input = train_input.reshape(
        train_input.shape[0],
        train_input.shape[1] * train_input.shape[2] * train_input.shape[3])

    # normalize the output of the frontend model
    train_input -= train_input.mean(axis=1)[:, np.newaxis]
    train_input /= np.std(train_input, axis=1)[:, np.newaxis]

    # make the data float32
    train_input = np.float32(train_input)

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(model_type=model_type,
                       weight_dims=dimensions,
                       p=pool,
                       group_size=group,
                       step=step,
                       lr=learn_rate,
                       opt=opt,
                       c=convolution,
                       test=test_model,
                       batch_size=batch_size,
                       random='y',
                       weights=None,
                       lcn_kernel=lcn_kernel)

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(train_input)

    # train the sparse filtering network
    print "training network..."
    start_time = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[l]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(
                    np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" %
                      (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - start_time
    print('Elapsed training time: %f' % elapsed)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt",
                    "wb")  # todo: create log file by looping through args
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m]))
        if model == 'GroupSF' or model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (filename, ex, whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")

    # get output activations for analyses
    for batch in xrange(n_batches):

        # get variables of interest
        activations_norm = {}
        activations_raw = {}
        activations_shuffled = {}
        reconstruction = {}
        error_recon = {}
        pooled = {}

        # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]()
        begin = batch * batch_size
        end = begin + batch_size
        f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](
            train_input[begin:end])

        # define [mini]batch title
        batch_title = 'layer' + '_end' + '_batch' + '%03d' % batch

        # define norm and raw file names
        norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat'

        activations_norm[batch_title] = f_hat
        activations_raw[batch_title] = f
        activations_shuffled[batch_title] = f_hat_shuffled
        reconstruction[batch_title] = err
        error_recon[batch_title] = rec
        pooled[batch_title] = p

        # save model as well as weights and activations separately
        savemat(norm_file_name, activations_norm)

        if aws == 'y':

            k.key = norm_file_name
            k.set_contents_from_filename(norm_file_name)
            os.remove(norm_file_name)

    # output helper file for concatenating activations
    helper = {'batches': n_batches, 'output_size': f_hat.shape}
    helper_file_name = directory_name + '/helper.mat'
    savemat(helper_file_name, helper)
    if aws == 'y':
        k.key = helper_file_name
        k.set_contents_from_filename(helper_file_name)
        os.remove(helper_file_name)
Ejemplo n.º 4
0
print "loading data..."
base_path = os.path.dirname(__file__)
file_path = os.path.join(base_path, "data", filename)
data = loadmat(file_path)['X']

# preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
print "pre-processing data..."
if filename == 'training.mat':
    data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
    data = data[:, :, :, :]
    channels = data.shape[1]
    if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat':
        for channel in range(channels):
            data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                                 reshape((data.shape[0], 1,
                                                                          data.shape[2],
                                                                          data.shape[3])),
                                                                 kernel_shape=9), (
                                                data.shape[0],
                                                data.shape[2],
                                                data.shape[3])
                                                )
elif filename == 'patches.mat':
    data -= data.mean(axis=0)
    data = np.float32(data.T)
    channels = 1

# set som more parameters
aws = 'y'
convolutional = 'n'
neurons = 625
Ejemplo n.º 5
0
def main():

    # define global parameters
    model_type = [
        'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF',
        'ConvolutionalSF', 'ConvolutionalSF'
    ]
    convolution = 'y'
    filename = "unlabeled_10000.mat"  # train  # unlabeled  # STL_10_lcn_unlabeled.mat.h5
    channels = 3
    patch_size = 14
    n_filters = [
        100,
        400,
        1600,
        3000,
        5000,
    ]
    # [100, 400, 1600, 6400, 25600]  # 1600  # increasing neurons x4 maintains dimensionality
    dimensions = (
        [n_filters[0], channels, 11, 11],
        [n_filters[1], n_filters[0], 4, 4],  # 6
        [n_filters[2], n_filters[1], 3, 3],  # 4
        [n_filters[3], n_filters[2], 2, 2],  # 3
        [n_filters[4], n_filters[3], 3, 3]  # 2
    )
    # ([n_filters, patch_size * patch_size * channels],)  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.0001  # 0.001
    iterations = [
        1,
        1,
        1,
        1,
        1,
        # 1
    ]  # [5, 5, 5]  # [50]  # [100]
    verbosity = 0
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 10  # 100  # 360  # 8000
    lcn_kernel = [
        5, 5, 3, 3, 3
    ]  # these may have to be odd values so that there is a middle
    aws = 'y'

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = None
    if filename == 'train.mat' or filename == 'unlabeled_10000.mat':
        data = loadmat(file_path)['X']
    elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5':
        data = h5py.File(file_path, 'r')['X']
        data = np.array(data)
        data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    print "pre-processing data..."
    data = np.float32(
        data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)),
                     int(np.sqrt(data.shape[1] / 3))))
    data = data[0:examples, :, :, :]
    print data.shape
    if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat':
        for channel in range(channels):
            data[:, channel, :, :] = np.reshape(
                scaling.LCNinput(data[:, channel, :, :].reshape(
                    (data.shape[0], 1, data.shape[2], data.shape[3])),
                                 kernel_shape=9),
                (data.shape[0], data.shape[2], data.shape[3]))

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(model_type=model_type,
                       weight_dims=dimensions,
                       p=pool,
                       group_size=group,
                       step=step,
                       lr=learn_rate,
                       opt=opt,
                       c=convolution,
                       test=test_model,
                       batch_size=batch_size,
                       random='y',
                       weights=None,
                       lcn_kernel=lcn_kernel)

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    start_time = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[l]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(
                    np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" %
                      (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - start_time
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    directory_name = None
    if aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt",
                    "wb")  # todo: create log file by looping through args
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n"
            % (m, model_type[m], dimensions[m], iterations[m]))
        if model == 'GroupSF' or model == 'GroupConvolutionalSF':
            log_file.write(" Groups: %d \n Step: %d" % (group, step))
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" %
                   (filename, ex, whitening))
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")
Ejemplo n.º 6
0
def main():

    """
    This script builds a deep convolutional sparse filtering network that has a final output [examples, maps, 1, 1],
    such that the entire image is viewed. The outputs of the final layer are concatenated together and serve as input
    to a new fully-connected network that uses the original sparse filtering object. The outputs of this fully connected
    layer are then used as input to a supervised classifier to evaluate the degree to which object categories are
    represented using fully unsupervised-learning.

    The standard sparse filtering algorithm can be replaced with the topographic version to evaluate semantic
    organization of high-level feature detectors.
    """

    # define global parameters
    model_type = [
        'ConvolutionalSF',
        'ConvolutionalSF',
        'ConvolutionalSF',
        'ConvolutionalSF',
        # 'ConvolutionalSF'
    ]
    convolution = 'y'
    filename = "train.mat"  # unlabeled
    channels = 3
    patch_size = 14
    n_filters = [
        100,
        200,
        400,
        800,
        # 1600
    ]  #
    # [100, 400, 1600, 6400, 25600]  # 1600  # increasing neurons x4 maintains dimensionality
    dimensions = (
        [n_filters[0], channels, 11, 11],
        [n_filters[1], n_filters[0], 4, 4],
        [n_filters[2], n_filters[1], 3, 3],
        [n_filters[3], n_filters[2], 2, 2],
        # [n_filters[4], n_filters[3], 3, 3]
    )
    # ([n_filters, patch_size * patch_size * channels],)  # ([100, 256],)
    pool = None
    group = None
    step = None
    learn_rate = 0.001  # 0.0001
    iterations = [
        1,
        1,
        1,
        1,
        # 1
    ]  # [5, 5, 5]  # [50]  # [100]
    verbosity = 0
    opt = 'GD'
    whitening = 'y'
    test_model = 'y'
    examples = None
    batch_size = 100  # 360  # 8000
    lcn_kernel = [5, 4, 3, 2]
    aws = 'y'

    # load in data
    print "loading data..."
    base_path = os.path.dirname(__file__)
    file_path = os.path.join(base_path, "data", filename)
    data = loadmat(file_path)['X']
    # data = h5py.File(file_path, 'r')['X']
    # data = np.array(data)
    # data = data.T

    # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read)
    print "pre-processing data..."
    data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3))))
    data = data[0:examples, :, :, :]
    for channel in range(channels):
        data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :].
                                                             reshape((data.shape[0], 1,
                                                                      data.shape[2],
                                                                      data.shape[3])),
                                                             kernel_shape=9), (
                                            data.shape[0],
                                            data.shape[2],
                                            data.shape[3]))

    # determine number of batches
    n_batches, rem = divmod(data.shape[0], batch_size)

    # construct the network
    print "building model..."
    model = sf.Network(
        model_type=model_type,
        weight_dims=dimensions,
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c=convolution,
        test=test_model,
        batch_size=batch_size,
        random='y',
        weights=None,
        lcn_kernel=lcn_kernel
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(data)

    # train the sparse filtering network
    print "training network..."
    start_time = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[l]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - start_time
    print('Elapsed training time: %f' % elapsed)

    # create sub-folder for saved model
    directory_name = None
    if aws == 'n':
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)
    elif aws == 'y':
        import boto
        from boto.s3.key import Key
        s3 = boto.connect_s3()
        my_bucket = 'dlacombejr.bucket'
        bucket = s3.get_bucket(my_bucket)
        k = Key(bucket)
        directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds"
        directory_name = directory_format % time.localtime()[0:6]
        os.mkdir(directory_name)

    # save the model for later use
    full_path = directory_name + '/model.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    # save weights separately
    savemat(directory_name + '/weights.mat', weights)
    if aws == 'y':
        k.key = directory_name + '/weights.mat'
        k.set_contents_from_filename(directory_name + '/weights.mat')
        os.remove(directory_name + '/weights.mat')

    # save the cost functions
    savemat(directory_name + '/cost.mat', cost)
    if aws == 'y':
        k.key = directory_name + '/cost.mat'
        k.set_contents_from_filename(directory_name + '/cost.mat')
        os.remove(directory_name + '/cost.mat')

    # create log file
    log_file = open(directory_name + "/log.txt", "wb")  # todo: create log file by looping through args
    for m in range(len(model_type)):
        log_file.write(
            "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m,
                                                                                    model_type[m],
                                                                                    dimensions[m],
                                                                                    iterations[m])
        )
        if model == 'GroupSF' or model == 'GroupConvolutionalSF':
            log_file.write(
                " Groups: %d \n Step: %d" % (group, step)
            )
        ex = data.shape[0]
        if examples is not None:
            ex = examples

    log_file.write(
        " Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)
    )
    log_file.write('\nElapsed training time: %f' % elapsed)
    log_file.close()
    if aws == 'y':
        k.key = directory_name + "/log.txt"
        k.set_contents_from_filename(directory_name + "/log.txt")
        os.remove(directory_name + "/log.txt")

    # collect the activations from the last layer to train fully connected layer
    activations_concatenated = []
    for batch in xrange(n_batches):
        begin = batch * batch_size
        end = begin + batch_size
        f_hat, _, _, _, _, _ = outputs[model.n_layers - 1](data[begin:end])
        activations_concatenated.append(
            f_hat.reshape(
                batch_size,
                f_hat.shape[1] * f_hat.shape[2] * f_hat.shape[3]
            )
        )

    # normalize the input
    final_input = np.asarray(activations_concatenated)
    final_input -= final_input.mean(axis=1)

    # train a regular sparse filtering network on top of final layer
    print "building model..."
    model = sf.Network(
        model_type=['SparseFilter'],
        weight_dims=([1089, final_input.shape[1]],),  # 33x33 is odd perfect square
        p=pool,
        group_size=group,
        step=step,
        lr=learn_rate,
        opt=opt,
        c='n',
        test=test_model,
        batch_size=batch_size,
        random='y',
        weights=None
    )

    # compile the training, output, and test functions for the network
    print "compiling theano functions..."
    train, outputs, test = model.training_functions(final_input)

    # train the sparse filtering network
    print "training network..."
    start_time = time.time()
    cost = {}
    weights = {}
    for l in xrange(model.n_layers):

        cost_layer = []
        w = None

        # iterate over training epochs
        for epoch in xrange(iterations[l]):

            # go though [mini]batches
            for batch_index in xrange(n_batches):

                # create index for random [mini]batch
                index = np.int32(np.random.randint(data.shape[0], size=batch_size))

                c, w = train[l](index=index)
                cost_layer.append(c)
                print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c))

        # add layer cost and weights to the dictionaries
        cost['layer' + str(l)] = cost_layer
        weights['layer' + str(l)] = w

    # calculate and display elapsed training time
    elapsed = time.time() - start_time
    print('Elapsed training time: %f' % elapsed)

    # save the model for later use
    full_path = directory_name + '/model2.pkl'
    pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL)
    if aws == 'y':
        k.key = full_path
        k.set_contents_from_filename(full_path)
        os.remove(full_path)

    ''' ================================ Test the Model ======================================= '''

    # test the model if evaluating classification performance
    if test_model == 'y':
        print 'testing...'

        from sklearn import svm

        # set some new local parameters
        train_data_file = "STL_10_lcn_train.mat"  # "train.mat"
        train_labels_file = "train.mat"
        test_data_file = "STL_10_lcn_test.mat"  # "test.mat"
        test_labels_file = "test.mat"
        batch_size = 100

        # todo: read in lcn data
        # load in STL-10 training data (all pre-normalized using LCN)
        print "loading in training and test data..."
        file_path = os.path.join(base_path, "data", train_data_file)
        train_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", train_labels_file)
        train_labels = loadmat(file_path)['y']

        # load in STL-10 test data (all pre-normalized using LCN)
        file_path = os.path.join(base_path, "data", test_data_file)
        test_data = loadmat(file_path)['X']
        file_path = os.path.join(base_path, "data", test_labels_file)
        test_labels = loadmat(file_path)['y']

        # # preproces training and test data
        # print "preprocessing training and test data..."
        # print train_data.shape

        # train_data = np.float32(train_data.reshape(-1,
        #                                            3,
        #                                            int(np.sqrt(train_data.shape[1] / 3)),
        #                                            int(np.sqrt(train_data.shape[1] / 3)))
        #                         )

        # print train_data.shape
        # for channel in range(channels):
        #     train_data[:, channel, :, :] = np.reshape(scaling.LCNinput(train_data[:, channel, :, :].
        #                                                                reshape((train_data.shape[0], 1,
        #                                                                         train_data.shape[2],
        #                                                                         train_data.shape[3])),
        #                                                                kernel_shape=9), (
        #                                               train_data.shape[0],
        #                                               train_data.shape[2],
        #                                               train_data.shape[3]))
        #
        # test_data = np.float32(test_data.reshape(-1,
        #                                          3,
        #                                          int(np.sqrt(test_data.shape[1] / 3)),
        #                                          int(np.sqrt(test_data.shape[1] / 3)))
        #                         )
        # for channel in range(channels):
        #     test_data[:, channel, :, :] = np.reshape(scaling.LCNinput(test_data[:, channel, :, :].
        #                                                               reshape((test_data.shape[0], 1,
        #                                                                        test_data.shape[2],
        #                                                                        test_data.shape[3])),
        #                                                               kernel_shape=9), (
        #                                              test_data.shape[0],
        #                                              test_data.shape[2],
        #                                              test_data.shape[3]))

        # read in the pre-defined fold indices
        file_path = os.path.join(base_path, "data", "train.mat")
        fold_indices = loadmat(file_path)['fold_indices']
        fold_indices -= np.ones(fold_indices.shape)  # make zero-index

        # train and test a SVM classifier for each layer (including pixels as baseline)
        accuracy = {}
        accuracy_list = []
        train_input = None
        test_input = None
        cm = None
        for layer in range(1, 4):  # range(test_model.n_layers + 1):  # skipping pixels for now

            # create dictionary for layer
            accuracy['layer' + str(layer)] = {}

            # create quadrant pooling function based on size of output from layer
            quadrant_size = test[layer - 1](test_data[0, :, :, :].reshape((1, 3, 96, 96)))[0].shape[3] / 2
            quad_pool = quadrant_pooling(quadrant_size)

            # loop over pre-defined folds
            n_folds = fold_indices.shape[1]
            for fold in xrange(n_folds):

                # get fold data
                fold_index = fold_indices[0][fold].astype('int')
                train_data_fold = np.squeeze(train_data[fold_index])
                train_labels_fold = np.squeeze(train_labels[fold_index])

                # pixel inputs
                if layer == 0:

                    if fold == 0:  # only get test data once
                        test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] *
                                                       test_data.shape[2] * test_data.shape[3])

                    train_input = train_data_fold.reshape(train_data_fold.shape[0], train_data_fold.shape[1] *
                                                          train_data_fold.shape[2] * train_data_fold.shape[3])

                # hidden layers
                elif layer > 0:

                    # get the output of the current layer in the model given the training / test data and then reshape
                    # TODO: use raw output as training and testing data?
                    if fold == 0:  # only get test data once
                        print "getting test data..."
                        test_input = np.zeros((test_data.shape[0], n_filters[layer - 1], 2, 2))
                        n_batches = test_data.shape[0] / batch_size
                        for batch in xrange(n_batches):
                            print "for batch %d" % batch
                            batch_start = batch * batch_size
                            batch_end = batch_start + batch_size
                            temp = test[layer - 1](test_data[batch_start:batch_end])
                            temp = temp[0]
                            test_input[batch_start:batch_end] = quad_pool(temp)[0]
                        test_input = test_input.reshape(test_input.shape[0], test_input.shape[1] *
                                                        test_input.shape[2] * test_input.shape[3])

                    print "getting training data..."
                    train_input = np.zeros((train_data_fold.shape[0], n_filters[layer - 1], 2, 2))
                    n_batches = train_data_fold.shape[0] / batch_size
                    for batch in xrange(n_batches):
                        print "for batch %d" % batch
                        batch_start = batch * batch_size
                        batch_end = batch_start + batch_size
                        temp = test[layer - 1](train_data_fold[batch_start:batch_end])
                        temp = temp[0]
                        train_input[batch_start:batch_end] = quad_pool(temp)[0]
                    train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] *
                                                      train_input.shape[2] * train_input.shape[3])

                # normalize the inputs for each dimension (zero-mean and unit-variance)
                if fold == 0:  # only normalize test data once
                    test_input -= test_input.mean(axis=1)[:, np.newaxis]
                    test_input /= np.std(test_input, axis=1)[:, np.newaxis]
                train_input -= train_input.mean(axis=1)[:, np.newaxis]
                train_input /= np.std(train_input, axis=1)[:, np.newaxis]

                # train linear support vector machine
                print("Training linear SVM...")
                clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples]))

                # get predictions from SVM and calculate accuracy
                print("Making predictions...")
                accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(test_input, test_labels[0:examples])
                accuracy_list.append(accuracy['layer' + str(layer)]['fold' + str(fold)])

                # display results and log them
                print("Accuracy of the classifier for fold %d at layer %1d: %0.4f" %
                      (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]))

                log_file = open(directory_name + "/log_test.txt", "a")
                log_file.write(
                    "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" %
                    (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])
                )
                log_file.close()

            # calculate and print out average accuracy and std
            avg = np.mean(accuracy_list)
            std = np.std(accuracy_list)
            print "The overall accuracy of layer %d: %0.4f +/- (%0.4f)" % (layer, float(avg), float(std))

        # save for aws
        if aws == 'y':
            k.key = directory_name + "/log_test.txt"
            k.set_contents_from_filename(directory_name + "/log_test.txt")

        # save the test results
        savemat('accuracy', accuracy)