def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension, image_dimension) all_parameters = [] ############################################# # a first convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional( X, X, input_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # a second convolution with 32 (5, 5) filters output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 32, (5, 5)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # MLP first layer output = output.flatten(2) output_test = output_test.flatten(2) output, output_test, params, output_shape = linear( output, output_test, (output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]), 500) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # MLP second layer output, output_test, params, output_shape = linear(output, output_test, output_shape, 1) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'sigmoid') # return output, output_test, all_parameters
def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension, image_dimension) all_parameters = [] ############################################# # a first convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(X, X, input_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # a second convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # a third convolution with 32 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # MLP first layer output = output.flatten(2) output_test = output_test.flatten(2) output, output_test, params, output_shape = linear(output, output_test, (output_shape[0], output_shape[1]*output_shape[2]*output_shape[3]), 500) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # MLP second layer output, output_test, params, output_shape = linear(output, output_test, output_shape, 1) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'sigmoid') # return output, output_test, all_parameters
def _build(self, num_classifiers, learning_rate): # inputs self.X = tf.placeholder(tf.float32, [None, 28, 28]) self.y = tf.placeholder(tf.int32, [None]) one_hot_y = tf.one_hot(self.y, 10) networks = [ layers.convolutional(self.X) for _ in range(num_classifiers) ] self.individual_loss = [ layers.loss(net, one_hot_y) for net in networks ] self.individual_accuracy = [ layers.accuracy(net, one_hot_y) for net in networks ] logits = tf.reduce_mean(tf.stack(networks, axis=-1), axis=-1) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=one_hot_y) self.loss = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = optimizer.minimize(self.loss) correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(one_hot_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.prediction = tf.argmax(logits, axis=1)
def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension[0], image_dimension[1]) all_parameters = [] acc_parameters = [] ############################################# # a first convolution with 64 (3, 3) filters output, output_test, params, output_shape = convolutional(X, X, input_shape, 64, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # a second convolution with 128 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 128, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # 2 convolutional layers with 256 (3, 3) filters output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 256, (3, 3)) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 256, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') ############################################# # Fully connected output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 1024, (1, 1)) all_parameters += params output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu') output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 1024, (1, 1)) all_parameters += params # maxpool with size=(4, 4) and fully connected output, output_test, params, output_shape = avgpool(output, output_test, output_shape, (4, 4)) output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 10, (1, 1)) all_parameters += params output, output_test, params, output_shape, cacc_parameters = batch_norm(output, output_test, output_shape) acc_parameters += cacc_parameters all_parameters += params # softmax output = multi_dim_softmax(output) output_test = multi_dim_softmax(output_test) # return output, output_test, all_parameters, acc_parameters
def main(): """ Train and evaluate the model with the chosen parameters. """ test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte') train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte') test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1) train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1) train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size) # Used for testing on trainng and testing set as GPU does not take the full training/ # testing sets in memory. test_train_dataset = dataset.Dataset(train_images, train_labels, 1000) test_dataset = dataset.Dataset(test_images, test_labels, 1000) with tf.device('/gpu:0'): if args.cnn: x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) else: x = tf.placeholder(tf.float32, shape=[None, 784]) y = tf.placeholder(tf.float32, shape=[None, 10]) # Two convolutional layers if requested, otherwise fully connected layers. if args.cnn: kernel_size = [3, 3, 3, 3, 3, 3, 3] channels = [16] for i in range(1, args.layers): channels.append(max(channels[-1] * 2, 64)) #if args.layers % 2 == 0: # channels[args.layers//2 - 1] = 128 # channels[args.layers//2] = 128; # for i in range(args.layers//2 - 1, -1, -1): # channels[i] = channels[i + 1]//2 # channels[args.layers - i - 1] = channels[args.layers - i - 2]//2 #else: # channels[args.layers//2] = 128 # for i in range(args.layers//2 - 1, -1, -1): # channels[i] = channels[i + 1]//2 # channels[args.layers - i - 1] = channels[args.layers - i - 2]//2 # weights and biases W = [] b = [] # conv, activations, pooling and normalizes s = [] h = [] p = [] n = [x] for i in range(args.layers): channels_in = 1 if i > 0: channels_in = channels[i - 1] W.append( initializer('W_conv' + str(i), [ kernel_size[i], kernel_size[i], channels_in, channels[i] ])) b.append( initializers.constant('b_conv' + str(i), [channels[i]], value=args.bias)) s.append( layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) p.append(layers.pooling('p_' + str(i), h[-1])) n.append(normalizer('n_' + str(i), p[-1])) shape = n[-1].get_shape().as_list() n[-1] = tf.reshape(n[-1], [-1, shape[1] * shape[2] * shape[3]]) else: units = [1000, 1000, 1000, 1000, 1000] # weights and biases W = [] b = [] # linear, activations and normalized s = [] h = [] n = [x] for i in range(args.layers): units_in = 784 if i > 0: units_in = units[i - 1] W.append(initializer('W_fc' + str(i), [units_in, units[i]])) b.append( initializers.constant('b_fc' + str(i), [units[i]], value=args.bias)) s.append( layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1])) n.append(activation('h_' + str(i), s[-1])) n.append(normalizer('n_' + str(i), h[-1])) W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100])) b.append(initializers.constant('b_fc3', [100], value=args.bias)) s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1])) h.append(activation('h_3', s[-1])) n.append(normalizer('n_3', h[-1])) W.append(initializer('W_fc4', [100, 10])) b.append(initializers.constant('b_fc4', [10], value=args.bias)) s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1])) y_ = layers.softmax('y_', s[-1]) # Loss definition and optimizer. cross_entropy = layers.cross_entropy('cross_entropy', y_, y) weights = [v for v in tf.all_variables() if v.name.startswith('W')] loss = cross_entropy + args.regularizer_weight * regularizer( 'regularizer', weights) prediction = layers.prediction('prediction', y_) label = layers.label('label', y) accuracy = layers.accuracy('accuracy', prediction, label) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) tformer = transformer.Transformer() tformer.convert(np.float32) tformer.scale(1. / 255.) # Reshape input if necessary. if not args.cnn: tformer.reshape([None, 784]) with tf.Session(config=tf.ConfigProto( log_device_placement=True)) as sess: sess.run(tf.initialize_all_variables()) # First run to visualize variables and backpropagated gradients. batch = train_dataset.next_batch() res = sess.run([optimizer] + W + h, feed_dict={ x: tformer.process(batch[0]), y: batch[1] }) # Get all the weights for the plot. weights = {} for i in range(args.layers + 1): weights['W_' + str(i)] = res[1 + i] plots.plot_normalized_histograms( weights, 'images/' + name + '_weights_t0.png') # Get all the activations for the plot. activations = {} for i in range(args.layers + 1): activations['h_' + str(i)] = res[1 + args.layers + 1 + i] plots.plot_normalized_histograms( activations, 'images/' + name + '_activations_t0.png') losses = [] batch_accuracies = [] train_accuracies = [] test_accuracies = [] means = [[] for i in range(args.layers)] stds = [[] for i in range(args.layers)] iterations = args.epochs * ( train_dataset.count // args.batch_size + 1) interval = iterations // 20 for i in range(iterations): # If this is the last batch, we reshuffle after this step. was_last_batch = train_dataset.next_batch_is_last() batch = train_dataset.next_batch() res = sess.run([optimizer, cross_entropy, accuracy] + h, feed_dict={ x: tformer.process(batch[0]), y: batch[1] }) # Get mean and variance of all layer activations after normalization, i.e. after normalization for j in range(args.layers): means[j].append(np.mean(res[3 + j])) stds[j].append(np.std(res[3 + j])) losses.append(res[1]) batch_accuracies.append(res[2]) print('Loss [%d]: %f' % (i, res[1])) print('Batch accuracy [%d]: %f' % (i, res[2])) if was_last_batch: train_dataset.shuffle() train_dataset.reset() if i % interval == 0: # Accuracy on training set. train_accuracy = 0 batches = test_train_dataset.count // test_train_dataset.batch_size for j in range(batches): batch = test_train_dataset.next_batch() train_accuracy += sess.run(accuracy, feed_dict={ x: tformer.process( batch[0]), y: batch[1] }) train_accuracy = train_accuracy / batches train_accuracies.append(train_accuracy) print('Train accuracy [%d]: %f' % (i, train_accuracy)) test_train_dataset.reset() # Accuracy on testing set. test_accuracy = 0 batches = test_dataset.count // test_dataset.batch_size for j in range(batches): batch = test_dataset.next_batch() test_accuracy += sess.run(accuracy, feed_dict={ x: tformer.process( batch[0]), y: batch[1] }) test_accuracy = test_accuracy / batches test_accuracies.append(test_accuracy) print('Test accuracy [%d]: %f' % (i, test_accuracy)) test_dataset.reset() # Plot loss for each iteration. plots.plot_lines( {'loss': (np.arange(0, i + 1), np.array(losses))}, 'images/' + name + '_loss.png') statistics = {} for j in range(args.layers): statistics['h_' + str(j) + '_mean'] = (np.arange( 0, i + 1), np.array(means[j])) statistics['h_' + str(j) + '_std'] = (np.arange( 0, i + 1), np.array(stds[j])) # Plot activations for each iteration. plots.plot_lines(statistics, 'images/' + name + '_activations.png') # Plot measures accuracy every 250th iteration. plots.plot_lines( { 'batch_accuracy': (np.arange(0, i + 1), np.array(batch_accuracies)), 'train_accuracy': (np.arange(0, i + 1, interval), np.array(train_accuracies)), 'test_accuracy': (np.arange(0, i + 1, interval), np.array(test_accuracies)), }, 'images/' + name + '_accuracy.png') sess.close()
def main(): """ Train a model with the given parameters. :return: number of iterations, interval used to measure accuracy, weight mean and standard deviation for each iteration and weight, activations in each layer for each iteration as mean and standard deviation, accuracies including batch, train and test accuracy :rtype: (int, int, {'W_0_mean': numpy.ndarray, ...}, {'h_0_mean': numpy.ndarray, ...}, {'batch_accuracies': numpy.ndarray, 'test_accuracies': numpy.ndarray, 'train_accuracies': numpy.ndarray}) """ # To be sure ... tf.reset_default_graph() test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte') train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte') test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1) train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1) train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size) # Used for testing on trainng and testing set as GPU does not take the full training/ # testing sets in memory. test_train_dataset = dataset.Dataset(train_images, train_labels, 1000) test_dataset = dataset.Dataset(test_images, test_labels, 1000) with tf.device('/gpu:0'): tf.set_random_seed(time.time()*1000) if args.cnn: x = tf.placeholder(tf.float32, shape = [None, 28, 28, 1]) else: x = tf.placeholder(tf.float32, shape = [None, 784]) if args.normalizer_data: n_x = normalizer('n_x', x) else: n_x = x y = tf.placeholder(tf.float32, shape = [None, 10]) # Two convolutional layers if requested, otherwise fully connected layers. if args.cnn: kernel_size = [3, 3, 3, 3, 3, 3, 3] channels = [16] for i in range(1, args.layers): channels.append(max(channels[-1]*2, 64)) # weights and biases W = [] b = [] # conv, activations, pooling and normalizes s = [] h = [] p = [] n = [n_x] for i in range(args.layers): channels_in = 1 if i > 0: channels_in = channels[i - 1] W.append(initializer('W_conv' + str(i), [kernel_size[i], kernel_size[i], channels_in, channels[i]])) b.append(initializers.constant('b_conv' + str(i), [channels[i]], value = args.bias)) s.append(layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) p.append(layers.pooling('p_' + str(i), h[-1])) n.append(normalizer('n_' + str(i), p[-1])) shape = n[-1].get_shape().as_list() n[-1] = tf.reshape(n[-1], [-1, shape[1]*shape[2]*shape[3]]) else: units = [1000, 1000, 1000, 1000, 1000] # weights and biases W = [] b = [] # linear, activations and normalized s = [] h = [] n = [n_x] for i in range(args.layers): units_in = 784 if i > 0: units_in = units[i - 1] W.append(initializer('W_fc' + str(i), [units_in, units[i]])) b.append(initializers.constant('b_fc' + str(i), [units[i]], value = args.bias)) s.append(layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1])) h.append(activation('h_' + str(i), s[-1])) n.append(normalizer('n_' + str(i), h[-1])) W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100])) b.append(initializers.constant('b_fc3', [100], value = args.bias)) s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1])) h.append(activation('h_3', s[-1])) n.append(normalizer('n_3', h[-1])) W.append(initializer('W_fc4', [100, 10])) b.append(initializers.constant('b_fc4', [10], value = args.bias)) s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1])) y_ = layers.softmax('y_', s[-1]) # Loss definition and optimizer. cross_entropy = layers.cross_entropy('cross_entropy', y_, y) weights = [v for v in tf.all_variables() if v.name.startswith('W')] loss = cross_entropy + args.regularizer_weight*regularizer('regularizer', weights) prediction = layers.prediction('prediction', y_) label = layers.label('label', y) accuracy = layers.accuracy('accuracy', prediction, label) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) tformer = transformer.Transformer() tformer.convert(np.float32) tformer.scale(1./255.) # Reshape input if necessary. if not args.cnn: tformer.reshape([None, 784]) with tf.Session(config = tf.ConfigProto(log_device_placement = True)) as sess: sess.run(tf.initialize_all_variables()) losses = [] batch_accuracies = [] train_accuracies = [] test_accuracies = [] n_W = args.layers + 2 n_h = args.layers + 1 weight_means = [[] for i in range(n_W)] activation_means = [[] for i in range(n_h)] iterations = args.epochs*(train_dataset.count//args.batch_size + 1) interval = iterations // 20 for i in range(iterations): # If this is the last batch, we reshuffle after this step. was_last_batch = train_dataset.next_batch_is_last() batch = train_dataset.next_batch() res = sess.run([optimizer, cross_entropy, accuracy] + W + h, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) for j in range(n_W): weight_means[j].append(np.mean(res[3 + j])) # Get mean and variance of all layer activations after normalization, i.e. after normalization for j in range(n_h): activation_means[j].append(np.mean(res[3 + n_W + j])) losses.append(res[1]) batch_accuracies.append(res[2]) print('Loss [%d]: %f' % (i, res[1])) print('Batch accuracy [%d]: %f' % (i, res[2])) if was_last_batch: train_dataset.shuffle() train_dataset.reset() if i % interval == 0: # Accuracy on training set. train_accuracy = 0 batches = test_train_dataset.count // test_train_dataset.batch_size for j in range(batches): batch = test_train_dataset.next_batch() train_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) train_accuracy = train_accuracy / batches train_accuracies.append(train_accuracy) print('Train accuracy [%d]: %f' % (i, train_accuracy)) test_train_dataset.reset() # Accuracy on testing set. test_accuracy = 0 batches = test_dataset.count // test_dataset.batch_size for j in range(batches): batch = test_dataset.next_batch() test_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]}) test_accuracy = test_accuracy / batches test_accuracies.append(test_accuracy) print('Test accuracy [%d]: %f' % (i, test_accuracy)) test_dataset.reset() sess.close() weights = {} for j in range(args.layers): weights['W_' + str(j)] = np.array(weight_means[j]) activations = {} for j in range(args.layers): activations['h_' + str(j)] = np.array(activation_means[j]) accuracies = { 'batch_accuracies': batch_accuracies, 'train_accuracies': train_accuracies, 'test_accuracies': test_accuracies } return iterations, interval, weights, activations, accuracies
def get_model(X, batch_size, image_dimension): input_shape = (batch_size, 3, image_dimension[0], image_dimension[1]) all_parameters = [] acc_parameters = [] ############################################# # a first convolution with 64 (3, 3) filters output, output_test, params, output_shape = convolutional( X, X, input_shape, 64, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # a second convolution with 128 (3, 3) filters output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 128, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # 2 convolutional layers with 256 (3, 3) filters output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 256, (3, 3)) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 256, (3, 3)) all_parameters += params # maxpool with size=(2, 2) output, output_test, params, output_shape = maxpool( output, output_test, output_shape, (2, 2)) # relu activation output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') ############################################# # Fully connected output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 1024, (1, 1)) all_parameters += params output, output_test, params, output_shape = activation( output, output_test, output_shape, 'relu') output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 1024, (1, 1)) all_parameters += params # maxpool with size=(4, 4) and fully connected output, output_test, params, output_shape = avgpool( output, output_test, output_shape, (4, 4)) output, output_test, params, output_shape = convolutional( output, output_test, output_shape, 10, (1, 1)) all_parameters += params output, output_test, params, output_shape, cacc_parameters = batch_norm( output, output_test, output_shape) acc_parameters += cacc_parameters all_parameters += params # softmax output = multi_dim_softmax(output) output_test = multi_dim_softmax(output_test) # return output, output_test, all_parameters, acc_parameters