def model(x, y, is_training): # %% We'll convert our MNIST vector data to a 4-D tensor: # N x W x H x C x_tensor = tf.reshape(x, [-1, 28, 28, 1]) # %% We'll use a new method called batch normalization. # This process attempts to "reduce internal covariate shift" # which is a fancy way of saying that it will normalize updates for each # batch using a smoothed version of the batch mean and variance # The original paper proposes using this before any nonlinearities h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'), is_training, scope='bn1'), name='lrelu1') h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'), is_training, scope='bn2'), name='lrelu2') h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'), is_training, scope='bn3'), name='lrelu3') h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4]) h_4 = linear(h_3_flat, 10) y_pred = tf.nn.softmax(h_4) # %% Define loss/eval/training functions cross_entropy = -tf.reduce_sum(y * tf.log(y_pred)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) return [train_step, accuracy]
def create_layer_reversed(self, input, prev_layer=None): # print('convd2_transposed: input_shape: {}'.format(utils.get_incoming_shape(input))) # W = self.encoder[layer_index] with tf.variable_scope('conv', reuse=True): W = tf.get_variable('W{}'.format(self.name[-3:])) b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) # if self.strides==[1, 1, 1, 1]: # print('Now') # output = lrelu(tf.add( # tf.nn.conv2d(input, W,strides=self.strides, padding='SAME'), b)) # else: # print('1Now1') output = tf.nn.conv2d_transpose(input, W, tf.stack([ tf.shape(input)[0], self.input_shape[1], self.input_shape[2], self.input_shape[3] ]), strides=self.strides, padding='SAME') Conv2d.layer_index += 1 output.set_shape([ None, self.input_shape[1], self.input_shape[2], self.input_shape[3] ]) output = lrelu(tf.add(tf.contrib.layers.batch_norm(output), b)) # print('convd2_transposed: output_shape: {}'.format(utils.get_incoming_shape(output))) return output
def create_layer(self, input): # print('convd2: input_shape: {}'.format(utils.get_incoming_shape(input))) self.input_shape = utils.get_incoming_shape(input) number_of_input_channels = self.input_shape[3] with tf.variable_scope('conv', reuse=False): # set the W.shape[1] to 1 if isinstance(self.initializer, tf.Tensor): W = tf.get_variable('W{}'.format(self.name[-2:]), initializer = self.initializer ) else: W = tf.get_variable('W{}'.format(self.name[-2:]), shape=(self.kernel_size, 1, number_of_input_channels, self.output_channels), initializer = self.initializer) b = tf.Variable(tf.zeros([self.output_channels])) self.encoder_matrix = W Conv2d.layer_index += 1 output = tf.nn.conv2d(input, W, strides=self.strides, padding='SAME') # print('convd2: output_shape: {}'.format(utils.get_incoming_shape(output))) #output = lrelu(tf.add(tf.contrib.layers.batch_norm(output, activation_fn=tf.nn.relu, is_training=True, reuse=None), b)) output = lrelu(tf.add(utils.batch_norm_layer(output, self.is_training,'BN{}'.format(self.name[-2:])), b)) #output = lrelu(tf.add(tf.contrib.layers.batch_norm(output, decay=0.999, center=True, scale=True, updates_collections=None,is_training=True, reuse=None), b)) return output
def create_layer_reversed(self, input, prev_layer=None): # print('convd2_transposed: input_shape: {}'.format(utils.get_incoming_shape(input))) # W = self.encoder[layer_index] with tf.variable_scope('conv', reuse=True): W = tf.get_variable('W{}'.format(self.name[-3:])) b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) # if self.strides==[1, 1, 1, 1]: # print('Now') # output = lrelu(tf.add( # tf.nn.conv2d(input, W,strides=self.strides, padding='SAME'), b)) # else: # print('1Now1') output = tf.nn.conv2d_transpose( input, W, tf.stack([tf.shape(input)[0], self.input_shape[1], self.input_shape[2], self.input_shape[3]]), strides=self.strides, padding='SAME') Conv2d.layer_index += 1 output.set_shape([None, self.input_shape[1], self.input_shape[2], self.input_shape[3]]) output = lrelu(tf.add(tf.contrib.layers.batch_norm(output), b)) # print('convd2_transposed: output_shape: {}'.format(utils.get_incoming_shape(output))) return output
def create_layer(self, input, is_training=True): self.input_shape = utils.get_incoming_shape(input) number_of_input_channels = self.input_shape[3] with tf.variable_scope('conv', reuse=False): W = tf.get_variable('W{}'.format(self.name), shape=(self.kernel_size, self.kernel_size, number_of_input_channels, self.output_channels)) b = tf.Variable(tf.zeros([self.output_channels])) self.encoder_matrix = W Conv2d.layer_index += 1 output = tf.nn.conv2d(input, W, strides=self.strides, padding='SAME') #output = lrelu(tf.add(tf.contrib.layers.batch_norm(output, scope="norm{}".format(self.name), is_training=is_training), b)) output = lrelu(tf.add(output, b)) return output
def create_layer(self, input): # print('convd2: input_shape: {}'.format(utils.get_incoming_shape(input))) self.input_shape = utils.get_incoming_shape(input) number_of_input_channels = self.input_shape[3] with tf.variable_scope('conv', reuse=False): W = tf.get_variable('W{}'.format(self.name[-3:]), shape=(self.kernel_size, self.kernel_size, number_of_input_channels, self.output_channels)) b = tf.Variable(tf.zeros([self.output_channels])) self.encoder_matrix = W Conv2d.layer_index += 1 output = tf.nn.conv2d(input, W, strides=self.strides, padding='SAME') # print('convd2: output_shape: {}'.format(utils.get_incoming_shape(output))) output = lrelu(tf.add(tf.contrib.layers.batch_norm(output), b)) return output
def create_layer_reversed(self, input, prev_layer=None, last_layer=False, is_training=True): with tf.variable_scope('conv', reuse=tf.AUTO_REUSE): W = tf.get_variable('W{}'.format(self.name)) b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) output = tf.nn.conv2d_transpose( input, W, tf.stack([tf.shape(input)[0], self.input_shape[1], self.input_shape[2], self.input_shape[3]]), strides=self.strides, padding='SAME') Conv2d.layer_index += 1 output.set_shape([None, self.input_shape[1], self.input_shape[2], self.input_shape[3]]) if last_layer: #output = tf.add(tf.contrib.layers.batch_norm(output, scope="tnorm{}".format(self.name), is_training=is_training), b, name="output") output = tf.add(output, b, name="output") else: #output = lrelu(tf.add(tf.contrib.layers.batch_norm(output, scope="tnorm{}".format(self.name), is_training=is_training), b)) output = lrelu(tf.add(output, b)) return output
def create_layer(self, input): # print('convd2: input_shape: {}'.format(utils.get_incoming_shape(input))) self.input_shape = utils.get_incoming_shape(input) number_of_input_channels = self.input_shape[3] with tf.variable_scope('conv', reuse=None): W = tf.get_variable('W{}'.format(self.name[-3:]), shape=(self.kernel_size, self.kernel_size, number_of_input_channels, self.output_channels)) b = tf.Variable(tf.zeros([self.output_channels])) self.encoder_matrix = W Conv2d.layer_index += 1 output = tf.nn.conv2d(input, W, strides=self.strides, padding='SAME') # print('convd2: output_shape: {}'.format(utils.get_incoming_shape(output))) output = lrelu(tf.add(tf.contrib.layers.batch_norm(output), b)) return output
def autoencoder(input_shape=[None, 784], n_filters=[1, 10, 10, 10], filter_sizes=[3, 3, 3, 3], corruption=False): """Build a deep denoising autoencoder w/ tied weights. Parameters ---------- input_shape : list, optional Description n_filters : list, optional Description filter_sizes : list, optional Description Returns ------- x : Tensor Input placeholder to the network z : Tensor Inner-most latent representation y : Tensor Output reconstruction of the input cost : Tensor Overall cost to use for training Raises ------ ValueError Description """ # %% # input to the network x = tf.placeholder(tf.float32, input_shape, name='x') # %% # ensure 2-d is converted to square tensor. if len(x.get_shape()) == 2: x_dim = np.sqrt(x.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape(x, [-1, x_dim, x_dim, n_filters[0]]) elif len(x.get_shape()) == 4: x_tensor = x else: raise ValueError('Unsupported input dimensions') current_input = x_tensor # %% # Optionally apply denoising autoencoder if corruption: current_input = corrupt(current_input) # %% # Build the encoder encoder = [] shapes = [] for layer_i, n_output in enumerate(n_filters[1:]): n_input = current_input.get_shape().as_list()[3] shapes.append(current_input.get_shape().as_list()) W = tf.Variable( tf.random_uniform([ filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) b = tf.Variable(tf.zeros([n_output])) encoder.append(W) output = lrelu( tf.add( tf.nn.conv2d(current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # store the latent representation z = current_input encoder.reverse() shapes.reverse() # %% # Build the decoder using the same weights for layer_i, shape in enumerate(shapes): W = encoder[layer_i] b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) output = lrelu( tf.add( tf.nn.conv2d_transpose( current_input, W, tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]), strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # now have the reconstruction through the network y = current_input # cost function measures pixel-wise difference cost = tf.reduce_sum(tf.square(y - x_tensor)) # %% return {'x': x, 'z': z, 'y': y, 'cost': cost}
from libs.connections import conv2d, linear from libs.datasets import MNIST # %% Setup input to the network and true output label. These are # simply placeholders which we'll fill in later. mnist = MNIST() x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) x_tensor = tf.reshape(x, [-1, 28, 28, 1]) # %% Define the network: bn1 = batch_norm(-1, name='bn1') bn2 = batch_norm(-1, name='bn2') bn3 = batch_norm(-1, name='bn3') h_1 = lrelu(bn1(conv2d(x_tensor, 32, name='conv1')), name='lrelu1') h_2 = lrelu(bn2(conv2d(h_1, 64, name='conv2')), name='lrelu2') h_3 = lrelu(bn3(conv2d(h_2, 64, name='conv3')), name='lrelu3') h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4]) h_4 = linear(h_3_flat, 10) y_pred = tf.nn.softmax(h_4) # %% Define loss/eval/training functions cross_entropy = -tf.reduce_sum(y * tf.log(y_pred)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) # %% We now create a new session to actually perform the initialization the # variables:
def autoencoder( input_shape=[None, 16384], # [num_examples, num_pixels] n_filters=[1, 10, 10, 10], # number of filters in each conv layer filter_sizes=[3, 3, 3, 3]): """Build a deep autoencoder w/ tied weights. Parameters ---------- input_shape : list, optional Description n_filters : list, optional Description filter_sizes : list, optional Description Returns ------- x : Tensor Input placeholder to the network z : Tensor Inner-most latent representation y : Tensor Output reconstruction of the input cost : Tensor Overall cost to use for training Raises ------ ValueError Description """ # input to the network x = tf.placeholder(tf.float32, input_shape, name='x') # ensure 2-d is converted to square tensor. if len(x.get_shape( )) == 2: # assuming second dim of input_shape is num_pixels of an example # convert 1D image into 2D and add fourth dimension for num_filters x_dim = np.sqrt( x.get_shape().as_list()[1]) # assuming each image is square if x_dim != int(x_dim): # not a square image raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape( x, [-1, x_dim, x_dim, n_filters[0] ]) # reshape input samples to m * 2D image * 1 layer for input elif len(x.get_shape()) == 4: # assuming we already did that x_tensor = x else: raise ValueError('Unsupported input dimensions') current_input = x_tensor # Build the encoder encoder = [] shapes = [] for layer_i, n_output in enumerate( n_filters[1:] ): # enumerate the number of filters in each hidden layer n_input = current_input.get_shape().as_list()[ 3] # number of filters in current input shapes.append(current_input.get_shape().as_list() ) # append shape of this layer's input W = tf.Variable( tf.random_uniform( [ filter_sizes[layer_i], filter_sizes[ layer_i], # a filter_size x filter_size filter n_input, n_output ], # mapping n_inps to n_outs -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) # create Weight mx W_ij = rand([-1,1]) b = tf.Variable(tf.zeros([n_output])) # create Bias vector encoder.append(W) output = lrelu( # apply non-linearity tf.add( tf.nn.conv2d(current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) # add bias to output of conv(inps,W) current_input = output # store the latent representation z = current_input encoder.reverse() # going backwards for the decoder shapes.reverse() # Build the decoder using the same weights for layer_i, shape in enumerate(shapes): W = encoder[layer_i] # using same weights as encoder b = tf.Variable(tf.zeros([W.get_shape().as_list()[2] ])) # but different biases output = lrelu( tf.add( tf.nn.conv2d_transpose( # transpose conv is deconv current_input, W, tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]), # output shape strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # now have the reconstruction through the network y = current_input # cost function measures pixel-wise difference between output and input cost = tf.reduce_sum(tf.square(y - x_tensor)) # %% return { 'x': x, 'z': z, 'y': y, 'cost': cost } # output of symbolic operations representing
def main(_): x = tf.placeholder(tf.float32, shape=[None, 784]) y = tf.placeholder(tf.float32, shape=[None, 28, 28]) sess = tf.InteractiveSession() shape_layer = [] # First convolution layer, the resulting image size would be 24 x 24 # shape_layer.append(x.get_shape().as_list()) im_in = tf.reshape(x, [-1, 28, 28, 1]) im_in_blur = tf.reshape(y, [-1, 28, 28, 1]) # import pdb; pdb.set_trace() # plt.figure() # plt.imshow(im_in_blur, cmap="gray") # plt.show(block=False) shape_layer.append(im_in.get_shape().as_list()) w_conv1 = weight_variable([3, 3, 1, 10]) b_conv1 = bias_variable([10]) y_conv1 = lrelu(conv2d(im_in, w_conv1) + b_conv1) # Second convolution layer, the resulting image size would be 20 x 20 shape_layer.append(y_conv1.get_shape().as_list()) w_conv2 = weight_variable([3, 3, 10, 10]) b_conv2 = bias_variable([10]) y_conv2 = lrelu(conv2d(y_conv1, w_conv2) + b_conv2) # Third convolution layer, the resulting image size would be 16 x 16 shape_layer.append(y_conv2.get_shape().as_list()) w_conv3 = weight_variable([3, 3, 10, 10]) b_conv3 = bias_variable([10]) y_conv3 = lrelu(conv2d(y_conv2, w_conv3) + b_conv3) # Fourth convolution layer, the resulting image size would be 10 x 10 shape_layer.append(y_conv3.get_shape().as_list()) w_conv4 = weight_variable([3, 3, 10, 10]) b_conv4 = bias_variable([10]) y_conv4 = lrelu(conv2d(y_conv3, w_conv4) + b_conv4) # Fifth convolution layer, the resulting image size would be 6 x 6 # shape_layer.append(y_conv4.get_shape().as_list()) # w_conv5 = weight_variable([3, 3, 10, 10]) # b_conv5 = bias_variable([10]) # y_conv5 = lrelu(conv2d(y_conv4, w_conv5) + b_conv5) # Latent representation # z = y_conv5 z = y_conv4 # import pdb; pdb.set_trace() shape_layer.reverse() # Deconvolution layer, use the same weights as corresponding convolution layers b_deconv1 = bias_variable([10]) b_deconv2 = bias_variable([10]) b_deconv3 = bias_variable([10]) # b_deconv4 = bias_variable([10]) b_deconv4 = bias_variable([1]) # b_deconv5 = bias_variable([1]) y_deconv1 = lrelu( deconv2d( y_conv4, w_conv4, tf.pack([ tf.shape(x)[0], shape_layer[0][1], shape_layer[0][2], shape_layer[0][3] ])) + b_deconv1) y_deconv2 = lrelu( deconv2d( y_deconv1, w_conv3, tf.pack([ tf.shape(x)[0], shape_layer[1][1], shape_layer[1][2], shape_layer[1][3] ])) + b_deconv2) y_deconv3 = lrelu( deconv2d( y_deconv2, w_conv2, tf.pack([ tf.shape(x)[0], shape_layer[2][1], shape_layer[2][2], shape_layer[2][3] ])) + b_deconv3) y_deconv4 = lrelu( deconv2d( y_deconv3, w_conv1, tf.pack([ tf.shape(x)[0], shape_layer[3][1], shape_layer[3][2], shape_layer[3][3] ])) + b_deconv4) # y_deconv5 = lrelu(deconv2d(y_deconv4, w_conv1, tf.pack([tf.shape(x)[0], shape_layer[4][1], shape_layer[4][2], shape_layer[4][3]]) ) + b_deconv5) # Now the output has been reconstructed through the network # y_out = y_deconv5 y_out = y_deconv4 # Define loss and optimizer loss = tf.reduce_sum(tf.square(y_out - im_in_blur)) train_step = tf.train.AdamOptimizer(1e-2).minimize(loss) sess.run(tf.initialize_all_variables()) mnist = input_data.read_data_sets("MNIST_data", one_hot=True) print(mnist.train.images.shape) mean_img = np.mean(mnist.train.images, axis=0) batch_size = 100 n_epoch = 500 for epoch_i in range(n_epoch): for batch_i in range(mnist.train.num_examples // batch_size): batch_xs, _ = mnist.train.next_batch(batch_size) batch_xs_train = np.array([img - mean_img for img in batch_xs]) # import pdb; pdb.set_trace() batch_xs_train_blur = scipy.ndimage.gaussian_filter(np.reshape( batch_xs_train, (-1, 28, 28)), sigma=1) sess.run(train_step, feed_dict={ x: batch_xs_train, y: batch_xs_train_blur }) print( "epoch: " + str(epoch_i), sess.run(loss, feed_dict={ x: batch_xs_train, y: batch_xs_train_blur })) # Plot example reconstructions n_examples = 10 test_xs, _ = mnist.test.next_batch(n_examples) test_xs_norm = np.array([img - mean_img for img in test_xs]) recon = sess.run(y_out, feed_dict={x: test_xs_norm}) print(recon.shape) fig, axs = plt.subplots(2, n_examples, figsize=(10, 2)) for example_i in range(n_examples): axs[0][example_i].imshow(np.reshape(test_xs[example_i, :], (28, 28))) axs[1][example_i].imshow( np.reshape( np.reshape(recon[example_i, ...], (784, )) + mean_img, (28, 28))) fig.show() plt.draw() plt.waitforbuttonpress() # Functions for visualizing the response of activation layrers def getActivations(layers, stimuli, fig_num=1): units_layers = [] for layer in layers: units = layer.eval( session=sess, feed_dict={x: np.reshape(stimuli, [1, 784], order='F')}) print(units.shape) units_layers.append(units) plotNNFilter(units_layers, fig_num) def plotNNFilter(units_layers, fig_num=1): # import pdb; pdb.set_trace() filters_maxNum = np.max([units.shape[3] for units in units_layers]) # fig_act = plt.figure(fig_num, figsize=(20,20)) fig_act, axs = plt.subplots(len(units_layers), filters_maxNum, figsize=(25, 15)) for layer_idx, units in enumerate(units_layers): for filter_i in range(0, units.shape[3]): plt.title('Filter ' + str(filter_i)) axs[layer_idx][filter_i].imshow(units[0, :, :, filter_i], interpolation="nearest", cmap="gray") # for layer_idx ,units in enumerate(units_layers): # filters = units.shape[3] # plot_num = 1 # for i in range(0,filters): # #import pdb; pdb.set_trace() # plt.subplot( len(units_layers), filters_maxNum, plot_num + layer_idx*filters_maxNum) # plt.title('Filter ' + str(i)) # plt.imshow(units[0,:,:,i], interpolation="nearest", cmap="gray") # plot_num += 1 fig_act.tight_layout() fig_act.show() plt.waitforbuttonpress()
def resnet_relu(input): return lrelu(input)
is_training = tf.placeholder(tf.bool, name='is_training') # %% We'll convert our MNIST vector data to a 4-D tensor: # N x W x H x C x_tensor = tf.reshape(x, [-1, 28, 28, 1]) #ema.apply([batch_mean, batch_var]) # %% We'll use a new method called batch normalization. # This process attempts to "reduce internal covariate shift" # which is a fancy way of saying that it will normalize updates for each # batch using a smoothed version of the batch mean and variance ''' # The original paper proposes using this before any nonlinearities!!!!!!!!!!!!!!! ''' # The original paper proposes using this before any nonlinearities!!!!!!!!!!!!!!! h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'), is_training, scope='bn1'), name='lrelu1') h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'), is_training, scope='bn2'), name='lrelu2') h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'), is_training, scope='bn3'), name='lrelu3') h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4]) h_4 = linear(h_3_flat, 10) y_pred = tf.nn.softmax(h_4) # %% Define loss/eval/training functions cross_entropy = -tf.reduce_sum(y * tf.log(y_pred)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
# %% We add a new type of placeholder to denote when we are training. # This will be used to change the way we compute the network during # training/testing. is_training = tf.placeholder(tf.bool, name='is_training') # %% We'll convert our MNIST vector data to a 4-D tensor: # N x W x H x C x_tensor = tf.reshape(x, [-1, 28, 28, 1]) # %% We'll use a new method called batch normalization. # This process attempts to "reduce internal covariate shift" # which is a fancy way of saying that it will normalize updates for each # batch using a smoothed version of the batch mean and variance # The original paper proposes using this before any nonlinearities h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'), is_training, scope='bn1'), name='lrelu1') h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'), is_training, scope='bn2'), name='lrelu2') h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'), is_training, scope='bn3'), name='lrelu3') h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4]) h_4 = linear(h_3_flat, 10) y_pred = tf.nn.softmax(h_4) # %% Define loss/eval/training functions cross_entropy = -tf.reduce_sum(y * tf.log(y_pred)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
x_tensor = tf.reshape(x, [-1, 1, 26, 1]) # FOR MFCC-26 dim #x_tensor = tf.reshape(x, [-1, 1, 40, 1]) # FOR CONVAE # %% We'll use a new method called batch normalization. # This process attempts to "reduce internal covariate shift" # which is a fancy way of saying that it will normalize updates for each # batch using a smoothed version of the batch mean and variance # The original paper proposes using this before any nonlinearities h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1', stride_h=1, k_h=1, k_w=3, pool_size=[1, 1, 2, 1], pool_stride=[1, 1, 1, 1]), phase_train=is_training, scope='bn1'), name='lrelu1') h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2', stride_h=1, k_h=1, k_w=3, pool_size=[1, 1, 2, 1], pool_stride=[1, 1, 1, 1]), phase_train=is_training,
def autoencoder(input_shape=[None, 784], n_filters=[1, 10, 10, 10], filter_sizes=[3, 3, 3, 3], corruption=False): """Build a deep denoising autoencoder w/ tied weights. Parameters ---------- input_shape : list, optional Description n_filters : list, optional Description filter_sizes : list, optional Description Returns ------- x : Tensor Input placeholder to the network z : Tensor Inner-most latent representation y : Tensor Output reconstruction of the input cost : Tensor Overall cost to use for training Raises ------ ValueError Description """ # %% # input to the network x = tf.placeholder( tf.float32, input_shape, name='x') # %% # Optionally apply denoising autoencoder if corruption: x_noise = corrupt(x) else: x_noise = x # %% # ensure 2-d is converted to square tensor. if len(x.get_shape()) == 2: x_dim = np.sqrt(x_noise.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape( x_noise, [-1, x_dim, x_dim, n_filters[0]]) elif len(x_noise.get_shape()) == 4: x_tensor = x_noise else: raise ValueError('Unsupported input dimensions') current_input = x_tensor # %% # Build the encoder encoder = [] shapes = [] for layer_i, n_output in enumerate(n_filters[1:]): n_input = current_input.get_shape().as_list()[3] shapes.append(current_input.get_shape().as_list()) W = tf.Variable( tf.random_uniform([ filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) b = tf.Variable(tf.zeros([n_output])) encoder.append(W) output = lrelu( tf.add(tf.nn.conv2d( current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # store the latent representation z = current_input encoder.reverse() shapes.reverse() # %% # Build the decoder using the same weights for layer_i, shape in enumerate(shapes): W = encoder[layer_i] b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) output = lrelu(tf.add( tf.nn.conv2d_transpose( current_input, W, tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]), strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # now have the reconstruction through the network y = current_input # cost function measures pixel-wise difference cost = tf.reduce_sum(tf.square(y - x_tensor)) # %% return {'x': x, 'z': z, 'y': y, 'cost': cost}