def discriminator(x, phase_train=False, n_features=64, reuse=False): """ dimensions: """ with tf.variable_scope("discriminator", reuse=reuse): with tf.variable_scope("0", reuse=reuse): conv0, W0 = utils.conv2d(x, n_features, name='d_h0_conv') h0 = utils.lrelu(conv0) with tf.variable_scope("1", reuse=reuse): conv1, W1 = utils.conv2d(h0, n_features * 2, name='d_h1_conv') norm1 = utils.batch_norm() norm1_1 = norm1(conv1, phase_train) h1 = utils.lrelu(norm1_1) with tf.variable_scope("2", reuse=reuse): conv2, W2 = utils.conv2d(h1, n_features * 4, name='d_h2_conv') norm2 = utils.batch_norm() norm2_1 = norm2(conv2, phase_train) h2 = utils.lrelu(norm2_1) with tf.variable_scope("3", reuse=reuse): conv3, W3 = utils.conv2d(h2, n_features * 8, name='d_h3_conv') norm3 = utils.batch_norm() norm3_1 = norm3(conv3, phase_train) h3 = utils.lrelu(norm3_1) with tf.variable_scope("4", reuse=reuse): h4, W4 = utils.linear(tf.reshape(h3, [-1, 8192]), n_output=1, name='d_h3_lin') return tf.nn.sigmoid(h4), h4
def generator(x, name='upsample'): with tf.variable_scope(name): # forcing the input to have a known name x = tf.identity(x, name='x') shape = x.get_shape() height = shape[1].value width = shape[2].value output_channels = shape[3].value with tf.variable_scope('residuals_lo'): with tf.variable_scope('prepass'): h, _ = conv2d(x, n_output=32, d_h=1, d_w=1, name='1') h = tf.nn.elu(h, name='1/elu') x = h h, _ = conv2d(h, n_output=32, d_h=2, d_w=2, name='2') h = tf.nn.elu(h, name='2/elu') channels = [32, 64, 32] for i, c in enumerate(channels): h, _ = conv2d(h, n_output=c, d_h=1, d_w=1, name='conv/{}'.format(i)) h = tf.nn.elu(h, name='conv/{}/elu'.format(i)) h, _ = deconv2d(h, n_output_h=height, n_output_w=width, n_output_ch=32, name='upscaling') h = tf.nn.elu(h, name='upscaling/elu') h = tf.add(h, x, name='join') with tf.variable_scope('clean_pass'): h, _ = conv2d(h, n_output=output_channels, d_h=1, d_w=1) h = tf.nn.relu(h, name='relu') # forcing the output to have a known name y = tf.identity(h, name='y') return y
def createConvNet(Xs, windowSize, n_classes): # ---------- create ConvNet tf.reset_default_graph() X = tf.placeholder(name='X', shape=(None, Xs.shape[1], Xs.shape[2], Xs.shape[3]), dtype=tf.float32) Y = tf.placeholder(name='Y', shape=(None, n_classes), dtype=tf.float32) # TODO: Explore different numbers of layers, and sizes of the network n_filters = [20, 20, 20] # Now let's loop over our n_filters and create the deep convolutional neural network H = X for layer_i, n_filters_i in enumerate(n_filters): # Let's use the helper function to create our connection to the next layer: # TODO: explore changing the parameters here: H, W = utils.conv2d(H, n_filters_i, k_h=2, k_w=2, d_h=2, d_w=2, name=str(layer_i)) # And use a nonlinearity # TODO: explore changing the activation here: H = tf.nn.softplus(H) # Just to check what's happening: print(H.get_shape().as_list()) # Connect the last convolutional layer to a fully connected network fc, W = utils.linear(H, n_output=100, name="fcn1", activation=tf.nn.relu) # fc2, W = utils.linear(fc, n_output=50, name="fcn2", activation=tf.nn.relu) # fc3, W = utils.linear(fc, n_output=10, name="fcn3", activation=tf.nn.relu) # And another fully connceted network, now with just n_classes outputs, the number of outputs that our # one hot encoding has Y_pred, W = utils.linear(fc, n_output=n_classes, name="pred", activation=tf.nn.sigmoid) return X, Y, Y_pred
def encoder(x, is_training, channels, filter_sizes, activation=tf.nn.tanh, reuse=None): # Set the input to a common variable name, h, for hidden layer h = x print('encoder/input:', h.get_shape().as_list()) # Now we'll loop over the list of dimensions defining the number # of output filters in each layer, and collect each hidden layer hs = [] for layer_i in range(len(channels)): with tf.variable_scope('layer{}'.format(layer_i + 1), reuse=reuse): # Convolve using the utility convolution function # This requirs the number of output filter, # and the size of the kernel in `k_h` and `k_w`. # By default, this will use a stride of 2, meaning # each new layer will be downsampled by 2. h, W = utils.conv2d(h, channels[layer_i], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], d_h=2, d_w=2, reuse=reuse) h = utils.batch_norm(h, is_training) # Now apply the activation function h = activation(h) print('layer:', layer_i, ', shape:', h.get_shape().as_list()) # Store each hidden layer hs.append(h) # Finally, return the encoding. return h, hs
def VAE(input_shape=[None, 784], output_shape=[None, 784], n_filters=[64, 64, 64], filter_sizes=[4, 4, 4], n_hidden=32, n_code=2, activation=tf.nn.tanh, dropout=False, denoising=False, convolutional=False, variational=False, softmax=False, classifier='alexnet_v2'): """(Variational) (Convolutional) (Denoising) Autoencoder. Uses tied weights. Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. If convolutional=True, this refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. If convolutional=False, then this refers to the total number of neurons for each layer in a fully connected network. filter_sizes : list, optional Only applied when convolutional=True. This refers to the ksize (height and width) of each convolutional layer. n_hidden : int, optional Only applied when variational=True. This refers to the first fully connected layer prior to the variational embedding, directly after the encoding. After the variational embedding, another fully connected layer is created with the same size prior to decoding. Set to 0 to not use an additional hidden layer. n_code : int, optional Only applied when variational=True. This refers to the number of latent Gaussians to sample for creating the inner most encoding. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. denoising : bool, optional Whether or not to apply denoising. If using denoising, you must feed a value for 'corrupt_rec', as returned in the dictionary. 1.0 means no corruption is used. 0.0 means every feature is corrupted. Sensible values are between 0.5-0.8. convolutional : bool, optional Whether or not to use a convolutional network or else a fully connected network will be created. This effects the n_filters parameter's meaning. variational : bool, optional Whether or not to create a variational embedding layer. This will create a fully connected layer after the encoding, if `n_hidden` is greater than 0, then will create a multivariate gaussian sampling layer, then another fully connected layer. The size of the fully connected layers are determined by `n_hidden`, and the size of the sampling layer is determined by `n_code`. Returns ------- model : dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor (latent features) 'y': Reconstruction of the Decoder 'keep_prob': Amount to keep when using Dropout 'corrupt_rec': Amount to corrupt when using Denoising 'train': Set to True when training/Applies to Batch Normalization. } """ # network input / placeholders for train (bn) and dropout x = tf.placeholder(tf.float32, input_shape, 'x') t = tf.placeholder(tf.float32, output_shape, 't') label = tf.placeholder(tf.int32, [None], 'label') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') corrupt_rec = tf.placeholder(tf.float32, name='corrupt_rec') corrupt_cls = tf.placeholder(tf.float32, name='corrupt_cls') # input of the reconstruction network # np.tanh(2) = 0.964 current_input1 = utils.corrupt(x)*corrupt_rec + x*(1-corrupt_rec) \ if (denoising and phase_train is not None) else x current_input1.set_shape(x.get_shape()) # 2d -> 4d if convolution current_input1 = utils.to_tensor(current_input1) \ if convolutional else current_input1 Ws = [] shapes = [] # Build the encoder for layer_i, n_output in enumerate(n_filters): with tf.variable_scope('encoder/{}'.format(layer_i)): shapes.append(current_input1.get_shape().as_list()) if convolutional: h, W = utils.conv2d(x=current_input1, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input1, n_output=n_output) h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) Ws.append(W) current_input1 = h shapes.append(current_input1.get_shape().as_list()) with tf.variable_scope('variational'): if variational: dims = current_input1.get_shape().as_list() flattened = utils.flatten(current_input1) if n_hidden: h = utils.linear(flattened, n_hidden, name='W_fc')[0] h = activation(batch_norm(h, phase_train, 'fc/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = flattened z_mu = utils.linear(h, n_code, name='mu')[0] z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] # modified by yidawang # s, u, v = tf.svd(z_log_sigma) # z_log_sigma = tf.matmul( # tf.matmul(u, tf.diag(s)), tf.transpose(v)) # end yidawang # Sample from noise distribution p(eps) ~ N(0, 1) epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code])) # Sample from posterior z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma)) if n_hidden: h = utils.linear(z, n_hidden, name='fc_t')[0] h = activation(batch_norm(h, phase_train, 'fc_t/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = z size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] h = utils.linear(h, size, name='fc_t2')[0] current_input1 = activation(batch_norm(h, phase_train, 'fc_t2/bn')) if dropout: current_input1 = tf.nn.dropout(current_input1, keep_prob) if convolutional: current_input1 = tf.reshape( current_input1, tf.stack([ tf.shape(current_input1)[0], dims[1], dims[2], dims[3] ])) else: z = current_input1 shapes.reverse() n_filters.reverse() Ws.reverse() n_filters += [input_shape[-1]] # %% # Decoding layers for layer_i, n_output in enumerate(n_filters[1:]): with tf.variable_scope('decoder/{}'.format(layer_i)): shape = shapes[layer_i + 1] if convolutional: h, W = utils.deconv2d(x=current_input1, n_output_h=shape[1], n_output_w=shape[2], n_output_ch=shape[3], n_input_ch=shapes[layer_i][3], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input1, n_output=n_output) h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) current_input1 = h y = current_input1 t_flat = utils.flatten(t) y_flat = utils.flatten(y) # l2 loss loss_x = tf.reduce_mean( tf.reduce_sum(tf.squared_difference(t_flat, y_flat), 1)) loss_z = 0 if variational: # Variational lower bound, kl-divergence loss_z = tf.reduce_mean(-0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1)) # Add l2 loss cost_vae = tf.reduce_mean(loss_x + loss_z) else: # Just optimize l2 loss cost_vae = tf.reduce_mean(loss_x) # Alexnet for clasification based on softmax using TensorFlow slim if softmax: axis = list(range(len(x.get_shape()))) mean1, variance1 = tf.nn.moments(t, axis) \ if (phase_train is True) else tf.nn.moments(x, axis) mean2, variance2 = tf.nn.moments(y, axis) var_prob = variance2 / variance1 # Input of the classification network current_input2 = utils.corrupt(x)*corrupt_cls + \ x*(1-corrupt_cls) \ if (denoising and phase_train is True) else x current_input2.set_shape(x.get_shape()) current_input2 = utils.to_tensor(current_input2) \ if convolutional else current_input2 y_concat = tf.concat([current_input2, y], 3) with tf.variable_scope('deconv/concat'): shape = shapes[layer_i + 1] if convolutional: # Here we set the input of classification network is # the twice of # the input of the reconstruction network # 112->224 for alexNet and 150->300 for inception v3 and v4 y_concat, W = utils.deconv2d( x=y_concat, n_output_h=y_concat.get_shape()[1] * 2, n_output_w=y_concat.get_shape()[1] * 2, n_output_ch=y_concat.get_shape()[3], n_input_ch=y_concat.get_shape()[3], k_h=3, k_w=3) Ws.append(W) # The following are optional networks for classification network if classifier == 'squeezenet': predictions, net = squeezenet.squeezenet(y_concat, num_classes=13) elif classifier == 'zigzagnet': predictions, net = squeezenet.zigzagnet(y_concat, num_classes=13) elif classifier == 'alexnet_v2': predictions, end_points = alexnet.alexnet_v2(y_concat, num_classes=13) elif classifier == 'inception_v1': predictions, end_points = inception.inception_v1(y_concat, num_classes=13) elif classifier == 'inception_v2': predictions, end_points = inception.inception_v2(y_concat, num_classes=13) elif classifier == 'inception_v3': predictions, end_points = inception.inception_v3(y_concat, num_classes=13) label_onehot = tf.one_hot(label, 13, axis=-1, dtype=tf.int32) cost_s = tf.losses.softmax_cross_entropy(label_onehot, predictions) cost_s = tf.reduce_mean(cost_s) acc = tf.nn.in_top_k(predictions, label, 1) else: predictions = tf.one_hot(label, 13, 1, 0) label_onehot = tf.one_hot(label, 13, 1, 0) cost_s = 0 acc = 0 # Using Summaries for Tensorboard tf.summary.scalar('cost_vae', cost_vae) tf.summary.scalar('cost_s', cost_s) tf.summary.scalar('loss_x', loss_x) tf.summary.scalar('loss_z', loss_z) tf.summary.scalar('corrupt_rec', corrupt_rec) tf.summary.scalar('corrupt_cls', corrupt_cls) tf.summary.scalar('var_prob', var_prob) merged = tf.summary.merge_all() return { 'cost_vae': cost_vae, 'cost_s': cost_s, 'loss_x': loss_x, 'loss_z': loss_z, 'Ws': Ws, 'x': x, 't': t, 'label': label, 'label_onehot': label_onehot, 'predictions': predictions, 'z': z, 'y': y, 'acc': acc, 'keep_prob': keep_prob, 'corrupt_rec': corrupt_rec, 'corrupt_cls': corrupt_cls, 'var_prob': var_prob, 'train': phase_train, 'merged': merged }
def VAE(input_shape=[None, 784], n_filters=[64, 64, 64], filter_sizes=[4, 4, 4], n_hidden=32, n_code=2, activation=tf.nn.tanh, dropout=False, denoising=False, convolutional=False, variational=False): """(Variational) (Convolutional) (Denoising) Autoencoder. Uses tied weights. Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. If convolutional=True, this refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. If convolutional=False, then this refers to the total number of neurons for each layer in a fully connected network. filter_sizes : list, optional Only applied when convolutional=True. This refers to the ksize (height and width) of each convolutional layer. n_hidden : int, optional Only applied when variational=True. This refers to the first fully connected layer prior to the variational embedding, directly after the encoding. After the variational embedding, another fully connected layer is created with the same size prior to decoding. Set to 0 to not use an additional hidden layer. n_code : int, optional Only applied when variational=True. This refers to the number of latent Gaussians to sample for creating the inner most encoding. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. denoising : bool, optional Whether or not to apply denoising. If using denoising, you must feed a value for 'corrupt_prob', as returned in the dictionary. 1.0 means no corruption is used. 0.0 means every feature is corrupted. Sensible values are between 0.5-0.8. convolutional : bool, optional Whether or not to use a convolutional network or else a fully connected network will be created. This effects the n_filters parameter's meaning. variational : bool, optional Whether or not to create a variational embedding layer. This will create a fully connected layer after the encoding, if `n_hidden` is greater than 0, then will create a multivariate gaussian sampling layer, then another fully connected layer. The size of the fully connected layers are determined by `n_hidden`, and the size of the sampling layer is determined by `n_code`. Returns ------- model : dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor (latent features) 'y': Reconstruction of the Decoder 'keep_prob': Amount to keep when using Dropout 'corrupt_prob': Amount to corrupt when using Denoising 'train': Set to True when training/Applies to Batch Normalization. } """ # network input / placeholders for train (bn) and dropout x = tf.placeholder(tf.float32, input_shape, 'x') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') corrupt_prob = tf.placeholder(tf.float32, [1]) # apply noise if denoising x_ = (utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)) if denoising else x # 2d -> 4d if convolution x_tensor = utils.to_tensor(x_) if convolutional else x_ current_input = x_tensor Ws = [] shapes = [] # Build the encoder for layer_i, n_output in enumerate(n_filters): with tf.variable_scope('encoder/{}'.format(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) Ws.append(W) current_input = h shapes.append(current_input.get_shape().as_list()) with tf.variable_scope('variational'): if variational: dims = current_input.get_shape().as_list() flattened = utils.flatten(current_input) if n_hidden: h = utils.linear(flattened, n_hidden, name='W_fc')[0] h = activation(batch_norm(h, phase_train, 'fc/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = flattened z_mu = utils.linear(h, n_code, name='mu')[0] z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] # Sample from noise distribution p(eps) ~ N(0, 1) epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code])) # Sample from posterior z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma)) if n_hidden: h = utils.linear(z, n_hidden, name='fc_t')[0] h = activation(batch_norm(h, phase_train, 'fc_t/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = z size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] h = utils.linear(h, size, name='fc_t2')[0] current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn')) if dropout: current_input = tf.nn.dropout(current_input, keep_prob) if convolutional: current_input = tf.reshape( current_input, tf.stack([ tf.shape(current_input)[0], dims[1], dims[2], dims[3] ])) else: z = current_input shapes.reverse() n_filters.reverse() Ws.reverse() n_filters += [input_shape[-1]] # %% # Decoding layers for layer_i, n_output in enumerate(n_filters[1:]): with tf.variable_scope('decoder/{}'.format(layer_i)): shape = shapes[layer_i + 1] if convolutional: h, W = utils.deconv2d(x=current_input, n_output_h=shape[1], n_output_w=shape[2], n_output_ch=shape[3], n_input_ch=shapes[layer_i][3], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) current_input = h y = current_input x_flat = utils.flatten(x) y_flat = utils.flatten(y) # l2 loss loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1) if variational: # variational lower bound, kl-divergence loss_z = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) # add l2 loss cost = tf.reduce_mean(loss_x + loss_z) else: # just optimize l2 loss cost = tf.reduce_mean(loss_x) return { 'cost': cost, 'Ws': Ws, 'x': x, 'z': z, 'y': y, 'keep_prob': keep_prob, 'corrupt_prob': corrupt_prob, 'train': phase_train }
def build_net(graph, training=True, validation=False): """Helper for creating a 2D convolution model. Parameters ---------- graph : tf.Graph default graph to build model training : bool, optional if true, use training dataset validation : bool, optional if true, use validation dataset Returns ------- batch : list list of images batch_labels : list list of labels for images batch_image_paths : list list of paths to image files init : tf.group initializer functions x : input image y : labels phase_train : tf.bool is training keep_prob : tf.float32 keep probability for conv2d layers keep_prob_fc1 : tf.float32 keep probability for fully connected layer learning_rate : tf.float32 learning rate h : output of sigmoid loss : loss optimizer : optimizer saver : tf.train.Saver """ with graph.as_default(): x = tf.placeholder(tf.float32, [None] + resize_shape, 'x') # TODO: use len(labels_map) y = tf.placeholder(tf.int32, [None, 17], 'y') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') keep_prob_fc1 = tf.placeholder(tf.float32, name='keep_prob_fc1') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Create Input Pipeline for Train, Validation and Test Sets if training: batch, batch_labels, batch_image_paths = dsutils.create_input_pipeline( image_paths=image_paths[:index_split_train_val], labels=labels_onehot_list[:index_split_train_val], batch_size=batch_size, n_epochs=n_epochs, shape=input_shape, crop_factor=resize_factor, training=training, randomize=True) elif validation: batch, batch_labels, batch_image_paths = dsutils.create_input_pipeline( image_paths=image_paths[index_split_train_val:], labels=labels_onehot_list[index_split_train_val:], batch_size=batch_size, # only one epoch for test output n_epochs=1, shape=input_shape, crop_factor=resize_factor, training=training) else: batch, batch_labels, batch_image_paths = dsutils.create_input_pipeline( image_paths=test_image_paths, labels=test_onehot_list, batch_size=batch_size, # only one epoch for test output n_epochs=1, shape=input_shape, crop_factor=resize_factor, training=training) Ws = [] current_input = x for layer_i, n_output in enumerate(n_filters): with tf.variable_scope('layer{}'.format(layer_i)): # 2D Convolutional Layer with batch normalization and relu h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) h = tf.layers.batch_normalization(h, training=phase_train) h = tf.nn.relu(h, 'relu' + str(layer_i)) # Apply Max Pooling Every 2nd Layer if layer_i % 2 == 0: h = tf.nn.max_pool(value=h, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Apply Dropout Every 2nd Layer if layer_i % 2 == 0: h = tf.nn.dropout(h, keep_prob) Ws.append(W) current_input = h h = utils.linear(current_input, fc_size, name='fc_t')[0] h = tf.layers.batch_normalization(h, training=phase_train) h = tf.nn.relu(h, name='fc_t/relu') h = tf.nn.dropout(h, keep_prob_fc1) logits = utils.linear(h, len(labels_map), name='fc_t2')[0] h = tf.nn.sigmoid(logits, 'fc_t2') # must be the same type as logits y_float = tf.cast(y, tf.float32) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_float) loss = tf.reduce_mean(cross_entropy) if training: # update moving_mean and moving_variance so it will be available at inference time update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) else: optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) saver = tf.train.Saver() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) return batch, batch_labels, batch_image_paths, init, x, y, phase_train, keep_prob, keep_prob_fc1, learning_rate, h, loss, optimizer, saver
def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], convolutional=False, activation=tf.nn.relu, output_activation=tf.nn.sigmoid): """Summary Parameters ---------- x : TYPE Description n_hidden : None, optional Description dimensions : list, optional Description filter_sizes : list, optional Description convolutional : bool, optional Description activation : TYPE, optional Description output_activation : TYPE, optional Description Returns ------- name : TYPE Description """ if convolutional: x_tensor = utils.to_tensor(x) else: x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]]) dimensions = dimensions[1:] current_input = x_tensor Ws = [] hs = [] shapes = [] for layer_i, n_output in enumerate(dimensions): with tf.variable_scope(str(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], padding='SAME') else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(h) Ws.append(W) hs.append(h) current_input = h shapes.append(h.get_shape().as_list()) with tf.variable_scope('flatten'): flattened = utils.flatten(current_input) with tf.variable_scope('hidden'): if n_hidden: h, W = utils.linear(flattened, n_hidden, name='linear') h = activation(h) else: h = flattened return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes}
def fcn_24_detect(threshold, dropout=False, activation=tf.nn.relu): imgs = tf.placeholder(tf.float32, [None, 24, 24, 3]) labels = tf.placeholder(tf.float32, [None, 1]) keep_prob = tf.placeholder(tf.float32, name='keep_prob') net_12 = fcn_12_detect(0.16, activation=activation) with tf.variable_scope('net_24'): conv1, _ = utils.conv2d(x=imgs, n_output=64, k_w=5, k_h=5, d_w=1, d_h=1, name="conv1") conv1 = activation(conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME", name="pool1") ip1, W1 = utils.conv2d(x=pool1, n_output=128, k_w=12, k_h=12, d_w=1, d_h=1, padding="VALID", name="ip1") ip1 = activation(ip1) net_12_ip1 = net_12['features'] concat = tf.concat(3, [ip1, net_12_ip1]) if dropout: concat = tf.nn.dropout(concat, keep_prob) ip2, W2 = utils.conv2d(x=concat, n_output=1, k_w=1, k_h=1, d_w=1, d_h=1, name="ip2") pred = tf.nn.sigmoid(utils.flatten(ip2)) target = utils.flatten(labels) regularizer = 8e-3 * (tf.nn.l2_loss(W1) + 100 * tf.nn.l2_loss(W2)) loss = tf.reduce_mean( tf.div( tf.add( -tf.reduce_sum(target * tf.log(pred + 1e-9), 1), -tf.reduce_sum((1 - target) * tf.log(1 - pred + 1e-9), 1)), 2)) + regularizer cost = tf.reduce_mean(loss) thresholding_24 = tf.cast(tf.greater(pred, threshold), "float") recall_24 = tf.reduce_sum( tf.cast( tf.logical_and(tf.equal(thresholding_24, tf.constant([1.0])), tf.equal(target, tf.constant([1.0]))), "float")) / tf.reduce_sum(target) correct_prediction = tf.equal( tf.cast(tf.greater(pred, threshold), tf.int32), tf.cast(target, tf.int32)) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return { 'net_12': net_12, 'imgs': imgs, 'labels': labels, 'imgs_12': net_12['imgs'], 'labels_12': net_12['labels'], 'keep_prob': keep_prob, 'keep_prob_12': net_12['keep_prob'], 'cost': cost, 'pred': pred, 'accuracy': acc, 'features': concat, 'recall': recall_24, 'thresholding': thresholding_24 }
def fcn_48_cal(dropout=False, activation=tf.nn.relu): imgs = tf.placeholder(tf.float32, [None, 48, 48, 3]) labels = tf.placeholder(tf.float32, [None]) with tf.variable_scope('cal_48'): conv1, _ = utils.conv2d(x=imgs, n_output=64, k_w=5, k_h=5, d_w=1, d_h=1, name="conv1") conv1 = activation(conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME", name="pool1") conv2, _ = utils.conv2d(x=pool1, n_output=64, k_w=5, k_h=5, d_w=1, d_h=1, name="conv2") ip1, W1 = utils.conv2d(x=conv2, n_output=256, k_w=24, k_h=24, d_w=1, d_h=1, padding="VALID", name="ip1") ip1 = activation(ip1) if dropout: ip1 = tf.nn.dropout(ip1, keep_prob) ip2, W2 = utils.conv2d(x=ip1, n_output=45, k_w=1, k_h=1, d_w=1, d_h=1, name="ip2") pred = utils.flatten(ip2) # target = utils.flatten(labels) # label_shape = labels.get_shape().as_list() # target = tf.reshape(labels,[label_shape[0]]) target = labels cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( pred, tf.cast(target, tf.int64))) regularizer = 8e-3 * (tf.nn.l2_loss(W1) + 100 * tf.nn.l2_loss(W2)) loss = cross_entropy + regularizer correct_prediction = tf.equal(tf.argmax(pred, 1), tf.cast(target, tf.int64)) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return { 'cost': loss, 'pred': pred, 'accuracy': acc, 'target': target, 'imgs': imgs, 'labels': labels }
def VAE(input_shape=[None, 784], n_filters=[64, 64, 64], filter_sizes=[4, 4, 4], n_hidden=32, n_code=2, activation=tf.nn.tanh, dropout=False, denoising=False, convolutional=False, variational=False): """(Variational) (Convolutional) (Denoising) Autoencoder. Uses tied weights. Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. If convolutional=True, this refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. If convolutional=False, then this refers to the total number of neurons for each layer in a fully connected network. filter_sizes : list, optional Only applied when convolutional=True. This refers to the ksize (height and width) of each convolutional layer. n_hidden : int, optional Only applied when variational=True. This refers to the first fully connected layer prior to the variational embedding, directly after the encoding. After the variational embedding, another fully connected layer is created with the same size prior to decoding. Set to 0 to not use an additional hidden layer. n_code : int, optional Only applied when variational=True. This refers to the number of latent Gaussians to sample for creating the inner most encoding. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. denoising : bool, optional Whether or not to apply denoising. If using denoising, you must feed a value for 'corrupt_prob', as returned in the dictionary. 1.0 means no corruption is used. 0.0 means every feature is corrupted. Sensible values are between 0.5-0.8. convolutional : bool, optional Whether or not to use a convolutional network or else a fully connected network will be created. This effects the n_filters parameter's meaning. variational : bool, optional Whether or not to create a variational embedding layer. This will create a fully connected layer after the encoding, if `n_hidden` is greater than 0, then will create a multivariate gaussian sampling layer, then another fully connected layer. The size of the fully connected layers are determined by `n_hidden`, and the size of the sampling layer is determined by `n_code`. Returns ------- model : dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor (latent features) 'y': Reconstruction of the Decoder 'keep_prob': Amount to keep when using Dropout 'corrupt_prob': Amount to corrupt when using Denoising 'train': Set to True when training/Applies to Batch Normalization. } """ # network input / placeholders for train (bn) and dropout x = tf.placeholder(tf.float32, input_shape, 'x') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') corrupt_prob = tf.placeholder(tf.float32, [1]) # apply noise if denoising x_ = (utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)) if denoising else x # 2d -> 4d if convolution x_tensor = utils.to_tensor(x_) if convolutional else x_ current_input = x_tensor Ws = [] shapes = [] # Build the encoder for layer_i, n_output in enumerate(n_filters): with tf.variable_scope('encoder/{}'.format(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) Ws.append(W) current_input = h shapes.append(current_input.get_shape().as_list()) with tf.variable_scope('variational'): if variational: dims = current_input.get_shape().as_list() flattened = utils.flatten(current_input) if n_hidden: h = utils.linear(flattened, n_hidden, name='W_fc')[0] h = activation(batch_norm(h, phase_train, 'fc/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = flattened z_mu = utils.linear(h, n_code, name='mu')[0] z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] # Sample from noise distribution p(eps) ~ N(0, 1) epsilon = tf.random_normal( tf.stack([tf.shape(x)[0], n_code])) # Sample from posterior z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma)) if n_hidden: h = utils.linear(z, n_hidden, name='fc_t')[0] h = activation(batch_norm(h, phase_train, 'fc_t/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = z size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] h = utils.linear(h, size, name='fc_t2')[0] current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn')) if dropout: current_input = tf.nn.dropout(current_input, keep_prob) if convolutional: current_input = tf.reshape( current_input, tf.stack([ tf.shape(current_input)[0], dims[1], dims[2], dims[3]])) else: z = current_input shapes.reverse() n_filters.reverse() Ws.reverse() n_filters += [input_shape[-1]] # %% # Decoding layers for layer_i, n_output in enumerate(n_filters[1:]): with tf.variable_scope('decoder/{}'.format(layer_i)): shape = shapes[layer_i + 1] if convolutional: h, W = utils.deconv2d(x=current_input, n_output_h=shape[1], n_output_w=shape[2], n_output_ch=shape[3], n_input_ch=shapes[layer_i][3], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) current_input = h y = current_input x_flat = utils.flatten(x) y_flat = utils.flatten(y) # l2 loss loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1) if variational: # variational lower bound, kl-divergence loss_z = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) # add l2 loss cost = tf.reduce_mean(loss_x + loss_z) else: # just optimize l2 loss cost = tf.reduce_mean(loss_x) return {'cost': cost, 'Ws': Ws, 'x': x, 'z': z, 'y': y, 'keep_prob': keep_prob, 'corrupt_prob': corrupt_prob, 'train': phase_train}
# Create the output to the network. This is our one hot encoding of 2 possible values Y = tf.placeholder(name='Y', shape=[None, 2], dtype=tf.float32) # TODO: Explore different numbers of layers, and sizes of the network n_filters = [16, 16, 16, 16] # Now let's loop over our n_filters and create the deep convolutional neural network H = X for layer_i, n_filters_i in enumerate(n_filters): # Let's use the helper function to create our connection to the next layer: # TODO: explore changing the parameters here: H, W = utils.conv2d(H, n_filters_i, k_h=16, k_w=16, d_h=2, d_w=2, name=str(layer_i)) # And use a nonlinearity # TODO: explore changing the activation here: H = tf.nn.relu(H) # Just to check what's happening: print(H.get_shape().as_list()) # Connect the last convolutional layer to a fully connected network fc, W = utils.linear(H, 100, name="fc1", activation=tf.nn.relu) # And another fully connected layer, now with just 2 outputs, the number of outputs that our
def main(winLSecs): data_dir = "/Users/alfonso/matlab/IndirectAcquisition/keras/dataforMarius/export" files = [os.path.join(data_dir, file_i) for file_i in os.listdir(data_dir) if file_i.endswith('.mat')] matlabStruct=umatlab.loadmat(files[1]).get('data') energyBand=matlabStruct.get('residualEnergyBand') energyBand=(energyBand /120 )+1 #normalize [0-1] totalSecs=matlabStruct.get('waveIn').shape[0]/matlabStruct.get('audioSR') energyBands_sr=240 #energyBand.shape[1]/totalSecs #This is around 240Hz- around 5ms at 44100Hz controlNames=matlabStruct.get('controlNames') controlData=matlabStruct.get('controlData') indexVel=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'abs(velocity)'][0] indexForce=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'forceN'][0] velocity=controlData[indexVel,:]/150 force=(controlData[indexForce,:]+0.2)/2 #indexString=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'string'][0] #string=controlData[indexString,:] #pitch=controlData[6,:]/1500 # We want winLSecs seconds of audio in our window #winLSecs = 0.05 windowSize = int((winLSecs * energyBands_sr) // 2 * 2) # And we'll move our window by windowSize/2 hopSize = windowSize // 2 n_hops = (energyBand.shape[1]) // hopSize print('windowSize', windowSize) # ------------- prepare dataset Xs = [] ys = [] # Let's start with the music files for filename in files: # print(filename) matlabStruct = umatlab.loadmat(filename).get('data') energyBand = (matlabStruct.get('energyBand') / 120) + 1 # energyBand=(matlabStruct.get('residualEnergyBand')/120)+1 controlData = matlabStruct.get('controlData') controlNames = matlabStruct.get('controlNames') target = controlData[indexVel, :] / 150 # target=(controlData[indexForce,:]+0.2)/2 n_hops = (energyBand.shape[1]) // hopSize # print(n_frames_per_second, n_frames, frame_hops, n_hops) n_hops = int(n_hops) - 1 for hop_i in range(n_hops): # Creating our sliding window frames = energyBand[:, (hop_i * hopSize):(hop_i * hopSize + windowSize)] Xs.append(frames[..., np.newaxis]) # And then store the vel ys.append(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)]) Xs = np.array(Xs) ys = np.array(ys) print(Xs.shape, ys.shape) ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], n_classes=0) #---------- create ConvNet tf.reset_default_graph() # Create the input to the network. This is a 4-dimensional tensor (batch_size, height(freq), widht(time), channels?)! # Recall that we are using sliding windows of our magnitudes (TODO): X = tf.placeholder(name='X', shape=(None, Xs.shape[1], Xs.shape[2], Xs.shape[3]), dtype=tf.float32) # Create the output to the network. This is our one hot encoding of 2 possible values (TODO)! Y = tf.placeholder(name='Y', shape=(None, windowSize), dtype=tf.float32) # TODO: Explore different numbers of layers, and sizes of the network n_filters = [9, 9, 9] # Now let's loop over our n_filters and create the deep convolutional neural network H = X for layer_i, n_filters_i in enumerate(n_filters): # Let's use the helper function to create our connection to the next layer: # TODO: explore changing the30 parameters here: H, W = utils.conv2d( H, n_filters_i, k_h=2, k_w=2, d_h=2, d_w=2, name=str(layer_i)) # And use a nonlinearity # TODO: explore changing the activation here: # H = tf.nn.relu(H) H = tf.nn.softplus(H) # H 4D tensor [batch, height, width, channels] # H=tf.nn.max_pool(value=H, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME', data_format='NHWC', name=None) # Just to check what's happening: print(H.get_shape().as_list()) # Connect the last convolutional layer to a fully connected network fc1, W = utils.linear(H, n_output=100, name="fcn1", activation=tf.nn.relu) # fc2, W = utils.linear(fc, n_output=50, name="fcn2", activation=tf.nn.relu) # fc3, W = utils.linear(fc2, n_output=10, name="fcn3", activation=tf.nn.relu) # And another fully connceted network, now with just n_classes outputs, the number of outputs Y_pred, W = utils.linear(fc1, n_output=windowSize, name="pred", activation=tf.nn.sigmoid) loss = tf.squared_difference(Y_pred, Y) cost = tf.reduce_mean(tf.reduce_sum(loss, 1)) learning_rate = 0.001 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # predicted_y = tf.argmax(Y_pred,1) # actual_y = tf.argmax(Y,1) # correct_prediction = tf.equal(predicted_y, actual_y) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) #-----TRAIN ConvNet # Explore these parameters: (TODO) batch_size = 400 # Create a session and init! sess = tf.Session() saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) # Now iterate over our dataset n_epoch times n_epochs = 100 for epoch_i in range(n_epochs): print('Epoch: ', epoch_i) # Train this_cost = 0 its = 0 # Do our mini batches: for Xs_i, ys_i in ds.train.next_batch(batch_size): # Note here: we are running the optimizer so # that the network parameters train! this_cost += sess.run([cost, optimizer], feed_dict={ X: Xs_i, Y: ys_i})[0] its += 1 # print(this_cost / its) print('Training cost: ', this_cost / its) # Validation (see how the network does on unseen data). this_cost = 0 its = 0 # Do our mini batches: for Xs_i, ys_i in ds.valid.next_batch(batch_size): # Note here: we are NOT running the optimizer! # we only measure the accuracy! this_cost += sess.run(cost, feed_dict={ X: Xs_i, Y: ys_i}) # , keep_prob: 1.0 its += 1 print('Validation cost: ', this_cost / its) # #-----plot convolutional Kernels learned # g = tf.get_default_graph() # for layer_i in range(len(n_filters)): # W = sess.run(g.get_tensor_by_name('{}/W:0'.format(layer_i))) # plt.figure(figsize=(5, 5)) # plt.imshow(utils.montage_filters(W)) # plt.title('Layer {}\'s Learned Convolution Kernels'.format(layer_i)) modelFileName = './models/velocity_wL' + str(winLSecs) + '_' + datetime.datetime.now().strftime( "%Y%m_d_%H%M") + '.chkp' saver.save(sess, modelFileName)
def deepID(input_shape=[None, 39, 39, 1], n_filters=[20, 40, 60, 80], filter_sizes=[4, 3, 3, 2], activation=tf.nn.relu, dropout=False): """DeepID. Uses tied weights. Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. If convolutional=True, this refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. If convolutional=False, then this refers to the total number of neurons for each layer in a fully connected network. filter_sizes : list, optional Only applied when convolutional=True. This refers to the ksize (height and width) of each convolutional layer. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. Returns ------- model : dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor (latent features) 'y': Reconstruction of the Decoder 'keep_prob': Amount to keep when using Dropout 'corrupt_prob': Amount to corrupt when using Denoising 'train': Set to True when training/Applies to Batch Normalization. } """ # network input / placeholders for train (bn) and dropout x = tf.placeholder(tf.float32, input_shape, 'x') y = tf.placeholder(tf.float32, [None, 10], 'y') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # 2d -> 4d if convolution x_tensor = utils.to_tensor(x) current_input = x_tensor Ws = [] shapes = [] # Build the encoder shapes.append(current_input.get_shape().as_list()) conv1, W = utils.conv2d(x=x_tensor, n_output=n_filters[0], k_h=filter_sizes[0], k_w=filter_sizes[0], d_w=1, d_h=1, name='conv1') Ws.append(W) # conv1 = activation(batch_norm(conv1, phase_train, 'bn1')) conv1 = activation(conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') conv2, W = utils.conv2d(x=pool1, n_output=n_filters[1], k_h=filter_sizes[1], k_w=filter_sizes[1], d_w=1, d_h=1, name='conv2') Ws.append(W) # conv2 = activation(batch_norm(conv2, phase_train, 'bn2')) conv2 = activation(conv2) pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') conv3, W = utils.conv2d(x=pool2, n_output=n_filters[2], k_h=filter_sizes[2], k_w=filter_sizes[2], d_w=1, d_h=1, name='conv3') Ws.append(W) # conv3 = activation(batch_norm(conv3, phase_train, 'bn3')) conv3 = activation(conv3) pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') conv4, W = utils.conv2d(x=pool3, n_output=n_filters[3], k_h=filter_sizes[3], k_w=filter_sizes[3], d_w=1, d_h=1, name='conv4') Ws.append(W) # conv4 = activation(batch_norm(conv4, phase_train, 'bn4')) conv4 = activation(conv4) pool3_flat = utils.flatten(pool3) conv4_flat = utils.flatten(conv4) concat = tf.concat(1, [pool3_flat, conv4_flat], name='concat') ip1, W = utils.linear(concat, 120, name='ip1') Ws.append(W) ip1 = activation(ip1) if dropout: ip1 = tf.nn.dropout(ip1, keep_prob) ip2, W = utils.linear(ip1, 10, name='ip2') Ws.append(W) # ip2 = activation(ip2) p_flat = utils.flatten(ip2) y_flat = utils.flatten(y) regularizers = 5e-4 * (tf.nn.l2_loss(Ws[-1]) + tf.nn.l2_loss(Ws[-2])) # l2 loss loss_x = tf.reduce_sum(tf.squared_difference(p_flat, y_flat), 1) cost = tf.reduce_mean(loss_x) + regularizers prediction = tf.reshape(p_flat, (-1, 5, 2)) return { 'cost': cost, 'Ws': Ws, 'x': x, 'y': y, 'pred': prediction, 'keep_prob': keep_prob, 'train': phase_train }
def encoder(x, n_hidden=None, dimensions=[], filter_sizes=[], convolutional=False, activation=tf.nn.relu, output_activation=tf.nn.sigmoid): """Summary Parameters ---------- x : TYPE Description n_hidden : None, optional Description dimensions : list, optional Description filter_sizes : list, optional Description convolutional : bool, optional Description activation : TYPE, optional Description output_activation : TYPE, optional Description Returns ------- name : TYPE Description """ if convolutional: x_tensor = utils.to_tensor(x) else: x_tensor = tf.reshape(tensor=x, shape=[-1, dimensions[0]]) dimensions = dimensions[1:] current_input = x_tensor Ws = [] hs = [] shapes = [] for layer_i, n_output in enumerate(dimensions): with tf.variable_scope(str(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: h, W = utils.conv2d( x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], padding='SAME') else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(h) Ws.append(W) hs.append(h) current_input = h shapes.append(h.get_shape().as_list()) with tf.variable_scope('flatten'): flattened = utils.flatten(current_input) with tf.variable_scope('hidden'): if n_hidden: h, W = utils.linear(flattened, n_hidden, name='linear') h = activation(h) else: h = flattened return {'z': h, 'Ws': Ws, 'hs': hs, 'shapes': shapes}