def _setup_net(placeholder, layers, weights, mean_pixel): """ Returns the cnn built with given weights and normalized with mean_pixel """ net = {} placeholder -= mean_pixel for i, name in enumerate(layers): kind = name[:4] with tf.variable_scope(name): if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: [width, height, in_channels, out_channels] # tensorflow: [height, width, in_channels, out_channels] kernels = tf_utils.get_variable( np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = tf_utils.get_variable( bias.reshape(-1), name=name + "_b") placeholder = tf_utils.conv2d(placeholder, kernels, bias) elif kind == 'relu': placeholder = tf.nn.relu(placeholder, name=name) tf_utils.add_activation_summary(placeholder, collections=['train']) elif kind == 'pool': placeholder = tf_utils.max_pool_2x2(placeholder) net[name] = placeholder return net
def inference(image, keep_prob): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_path) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer, "pool5") W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6, name="conv6") relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.weight_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7, name="conv7") relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSES], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSES], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8, name="conv8") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSES], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, "conv_t1", output_shape=tf.shape(image_net("pool4"))) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, "conv_t2", output_shape=tf.shape(image_net("pool3"))) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, "conv_t3", output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, axis=2, name="prediction") return tf.expand_dims(annotation_pred, axi=3), conv_t3
def GEDDnet(face, left_eye, right_eye, keep_prob, is_train, subj_id, vgg_path, num_subj=15, rf=[[2, 2], [3, 3], [5, 5], [11, 11]], num_face=[64, 128, 64, 64, 128, 256, 64], r=[[2, 2], [3, 3], [4, 5], [5, 11]], num_eye=[64, 128, 64, 64, 128, 256], num_comb=[0, 256]): num_comb[0] = num_face[-1] + 2 * num_eye[-1] vgg = np.load(vgg_path) with tf.variable_scope("transfer"): W_conv1_1 = tf.Variable(vgg['conv1_1_W']) b_conv1_1 = tf.Variable(vgg['conv1_1_b']) W_conv1_2 = tf.Variable(vgg['conv1_2_W']) b_conv1_2 = tf.Variable(vgg['conv1_2_b']) W_conv2_1 = tf.Variable(vgg['conv2_1_W']) b_conv2_1 = tf.Variable(vgg['conv2_1_b']) W_conv2_2 = tf.Variable(vgg['conv2_2_W']) b_conv2_2 = tf.Variable(vgg['conv2_2_b']) del vgg """ define network """ # face face_h_conv1_1 = tf.nn.relu(conv2d(face, W_conv1_1) + b_conv1_1) face_h_conv1_2 = tf.nn.relu(conv2d(face_h_conv1_1, W_conv1_2) + b_conv1_2) face_h_pool1 = max_pool_2x2(face_h_conv1_2) face_h_conv2_1 = tf.nn.relu(conv2d(face_h_pool1, W_conv2_1) + b_conv2_1) face_h_conv2_2 = tf.nn.relu(conv2d(face_h_conv2_1, W_conv2_2) + b_conv2_2) / 100. with tf.variable_scope("face"): face_W_conv2_3 = weight_variable([1, 1, num_face[1], num_face[2]], std=0.125) face_b_conv2_3 = bias_variable([num_face[2]], std=0.001) face_W_conv3_1 = weight_variable([3, 3, num_face[2], num_face[3]], std=0.06) face_b_conv3_1 = bias_variable([num_face[3]], std=0.001) face_W_conv3_2 = weight_variable([3, 3, num_face[3], num_face[3]], std=0.06) face_b_conv3_2 = bias_variable([num_face[3]], std=0.001) face_W_conv4_1 = weight_variable([3, 3, num_face[3], num_face[4]], std=0.08) face_b_conv4_1 = bias_variable([num_face[4]], std=0.001) face_W_conv4_2 = weight_variable([3, 3, num_face[4], num_face[4]], std=0.07) face_b_conv4_2 = bias_variable([num_face[4]], std=0.001) face_W_fc1 = weight_variable([6 * 6 * num_face[4], num_face[5]], std=0.035) face_b_fc1 = bias_variable([num_face[5]], std=0.001) face_W_fc2 = weight_variable([num_face[5], num_face[6]], std=0.1) face_b_fc2 = bias_variable([num_face[6]], std=0.001) face_h_conv2_3 = tf.nn.relu( conv2d(face_h_conv2_2, face_W_conv2_3) + face_b_conv2_3) face_h_conv2_3_norm = tf.layers.batch_normalization(face_h_conv2_3, training=is_train, scale=False, renorm=True, name="f_conv2_3") face_h_conv3_1 = tf.nn.relu( dilated2d(face_h_conv2_3_norm, face_W_conv3_1, rf[0]) + face_b_conv3_1) face_h_conv3_1_norm = tf.layers.batch_normalization(face_h_conv3_1, training=is_train, scale=False, renorm=True, name="f_conv3_1") face_h_conv3_2 = tf.nn.relu( dilated2d(face_h_conv3_1_norm, face_W_conv3_2, rf[1]) + face_b_conv3_2) face_h_conv3_2_norm = tf.layers.batch_normalization(face_h_conv3_2, training=is_train, scale=False, renorm=True, name="f_conv3_2") face_h_conv4_1 = tf.nn.relu( dilated2d(face_h_conv3_2_norm, face_W_conv4_1, rf[2]) + face_b_conv4_1) face_h_conv4_1_norm = tf.layers.batch_normalization(face_h_conv4_1, training=is_train, scale=False, renorm=True, name="f_conv4_1") face_h_conv4_2 = tf.nn.relu( dilated2d(face_h_conv4_1_norm, face_W_conv4_2, rf[3]) + face_b_conv4_2) face_h_conv4_2_norm = tf.layers.batch_normalization(face_h_conv4_2, training=is_train, scale=False, renorm=True, name="f_conv4_2") face_h_pool4_flat = tf.reshape(face_h_conv4_2_norm, [-1, 6 * 6 * num_face[4]]) face_h_fc1 = tf.nn.relu( tf.matmul(face_h_pool4_flat, face_W_fc1) + face_b_fc1) face_h_fc1_norm = tf.layers.batch_normalization(face_h_fc1, training=is_train, scale=False, renorm=True, name="f_fc1") face_h_fc1_drop = tf.nn.dropout(face_h_fc1_norm, keep_prob) face_h_fc2 = tf.nn.relu( tf.matmul(face_h_fc1_drop, face_W_fc2) + face_b_fc2) face_h_fc2_norm = tf.layers.batch_normalization(face_h_fc2, training=is_train, scale=False, renorm=True, name="f_fc2") eye1_h_conv1_1 = tf.nn.relu(conv2d(left_eye, W_conv1_1) + b_conv1_1) eye1_h_conv1_2 = tf.nn.relu(conv2d(eye1_h_conv1_1, W_conv1_2) + b_conv1_2) eye1_h_pool1 = max_pool_2x2(eye1_h_conv1_2) eye1_h_conv2_1 = tf.nn.relu(conv2d(eye1_h_pool1, W_conv2_1) + b_conv2_1) eye1_h_conv2_2 = tf.nn.relu(conv2d(eye1_h_conv2_1, W_conv2_2) + b_conv2_2) / 100. eye2_h_conv1_1 = tf.nn.relu(conv2d(right_eye, W_conv1_1) + b_conv1_1) eye2_h_conv1_2 = tf.nn.relu(conv2d(eye2_h_conv1_1, W_conv1_2) + b_conv1_2) eye2_h_pool1 = max_pool_2x2(eye2_h_conv1_2) eye2_h_conv2_1 = tf.nn.relu(conv2d(eye2_h_pool1, W_conv2_1) + b_conv2_1) eye2_h_conv2_2 = tf.nn.relu(conv2d(eye2_h_conv2_1, W_conv2_2) + b_conv2_2) / 100. with tf.variable_scope("eye"): # left eye eye_W_conv2_3 = weight_variable([1, 1, num_eye[1], num_eye[2]], std=0.125) eye_b_conv2_3 = bias_variable([num_eye[2]], std=0.001) eye_W_conv3_1 = weight_variable([3, 3, num_eye[2], num_eye[3]], std=0.06) eye_b_conv3_1 = bias_variable([num_eye[3]], std=0.001) eye_W_conv3_2 = weight_variable([3, 3, num_eye[3], num_eye[3]], std=0.06) eye_b_conv3_2 = bias_variable([num_eye[3]], std=0.001) eye_W_conv4_1 = weight_variable([3, 3, num_eye[3], num_eye[4]], std=0.06) eye_b_conv4_1 = bias_variable([num_eye[4]], std=0.001) eye_W_conv4_2 = weight_variable([3, 3, num_eye[4], num_eye[4]], std=0.04) eye_b_conv4_2 = bias_variable([num_eye[4]], std=0.001) eye1_W_fc1 = weight_variable([4 * 6 * num_eye[4], num_eye[5]], std=0.026) eye1_b_fc1 = bias_variable([num_eye[5]], std=0.001) eye2_W_fc1 = weight_variable([4 * 6 * num_eye[4], num_eye[5]], std=0.026) eye2_b_fc1 = bias_variable([num_eye[5]], std=0.001) eye1_h_conv2_3 = tf.nn.relu( conv2d(eye1_h_conv2_2, eye_W_conv2_3) + eye_b_conv2_3) eye1_h_conv2_3_norm = tf.layers.batch_normalization(eye1_h_conv2_3, training=is_train, scale=False, renorm=True, name="e_conv2_3") eye1_h_conv3_1 = tf.nn.relu( dilated2d(eye1_h_conv2_3_norm, eye_W_conv3_1, r[0]) + eye_b_conv3_1) eye1_h_conv3_1_norm = tf.layers.batch_normalization(eye1_h_conv3_1, training=is_train, scale=False, renorm=True, name="e_conv3_1") eye1_h_conv3_2 = tf.nn.relu( dilated2d(eye1_h_conv3_1_norm, eye_W_conv3_2, r[1]) + eye_b_conv3_2) eye1_h_conv3_2_norm = tf.layers.batch_normalization(eye1_h_conv3_2, training=is_train, scale=False, renorm=True, name="e_conv3_2") eye1_h_conv4_1 = tf.nn.relu( dilated2d(eye1_h_conv3_2_norm, eye_W_conv4_1, r[2]) + eye_b_conv4_1) eye1_h_conv4_1_norm = tf.layers.batch_normalization(eye1_h_conv4_1, training=is_train, scale=False, renorm=True, name="e_conv4_1") eye1_h_conv4_2 = tf.nn.relu( dilated2d(eye1_h_conv4_1_norm, eye_W_conv4_2, r[3]) + eye_b_conv4_2) eye1_h_conv4_2_norm = tf.layers.batch_normalization(eye1_h_conv4_2, training=is_train, scale=False, renorm=True, name="e_conv4_2") eye1_h_pool4_flat = tf.reshape(eye1_h_conv4_2_norm, [-1, 4 * 6 * num_eye[4]]) eye1_h_fc1 = tf.nn.relu( tf.matmul(eye1_h_pool4_flat, eye1_W_fc1) + eye1_b_fc1) eye1_h_fc1_norm = tf.layers.batch_normalization(eye1_h_fc1, training=is_train, scale=False, renorm=True, name="e1_fc1") # right eye eye2_h_conv2_3 = tf.nn.relu( conv2d(eye2_h_conv2_2, eye_W_conv2_3) + eye_b_conv2_3) eye2_h_conv2_3_norm = tf.layers.batch_normalization(eye2_h_conv2_3, training=is_train, scale=False, renorm=True, name="e_conv2_3", reuse=True) eye2_h_conv3_1 = tf.nn.relu( dilated2d(eye2_h_conv2_3_norm, eye_W_conv3_1, r[0]) + eye_b_conv3_1) eye2_h_conv3_1_norm = tf.layers.batch_normalization(eye2_h_conv3_1, training=is_train, scale=False, renorm=True, name="e_conv3_1", reuse=True) eye2_h_conv3_2 = tf.nn.relu( dilated2d(eye2_h_conv3_1_norm, eye_W_conv3_2, r[1]) + eye_b_conv3_2) eye2_h_conv3_2_norm = tf.layers.batch_normalization(eye2_h_conv3_2, training=is_train, scale=False, renorm=True, name="e_conv3_2", reuse=True) eye2_h_conv4_1 = tf.nn.relu( dilated2d(eye2_h_conv3_2_norm, eye_W_conv4_1, r[2]) + eye_b_conv4_1) eye2_h_conv4_1_norm = tf.layers.batch_normalization(eye2_h_conv4_1, training=is_train, scale=False, renorm=True, name="e_conv4_1", reuse=True) eye2_h_conv4_2 = tf.nn.relu( dilated2d(eye2_h_conv4_1_norm, eye_W_conv4_2, r[3]) + eye_b_conv4_2) eye2_h_conv4_2_norm = tf.layers.batch_normalization(eye2_h_conv4_2, training=is_train, scale=False, renorm=True, name="e_conv4_2", reuse=True) eye2_h_pool4_flat = tf.reshape(eye2_h_conv4_2_norm, [-1, 4 * 6 * num_eye[4]]) eye2_h_fc1 = tf.nn.relu( tf.matmul(eye2_h_pool4_flat, eye2_W_fc1) + eye2_b_fc1) eye2_h_fc1_norm = tf.layers.batch_normalization(eye2_h_fc1, training=is_train, scale=False, renorm=True, name="e2_fc1") # combine both eyes and face with tf.variable_scope("combine"): cls1_W_fc2 = weight_variable([num_comb[0], num_comb[1]], std=0.07) cls1_b_fc2 = bias_variable([num_comb[1]], std=0.001) cls1_W_fc3 = weight_variable([num_comb[1], 2], std=0.125) cls1_b_fc3 = bias_variable([2], std=0.001) cls1_h_fc1_norm = tf.concat( [face_h_fc2_norm, eye1_h_fc1_norm, eye2_h_fc1_norm], axis=1) cls1_h_fc1_drop = tf.nn.dropout(cls1_h_fc1_norm, keep_prob) cls1_h_fc2 = tf.nn.relu( tf.matmul(cls1_h_fc1_drop, cls1_W_fc2) + cls1_b_fc2) cls1_h_fc2_norm = tf.layers.batch_normalization(cls1_h_fc2, training=is_train, scale=False, renorm=True, name="c_fc2") cls1_h_fc2_drop = tf.nn.dropout(cls1_h_fc2_norm, keep_prob) t_hat = tf.matmul(cls1_h_fc2_drop, cls1_W_fc3) + cls1_b_fc3 """ bias learning from subject id """ num_bias = (2 * num_subj, ) with tf.variable_scope("bias"): bias_W_fc = weight_variable([num_bias[0], 2], std=0.125) b_hat = tf.matmul(subj_id, bias_W_fc) g_hat = t_hat + b_hat l2_loss = (1e-2 * tf.nn.l2_loss(W_conv1_1) + 1e-2 * tf.nn.l2_loss(W_conv1_2) + 1e-2 * tf.nn.l2_loss(W_conv2_1) + 1e-2 * tf.nn.l2_loss(W_conv2_2) + tf.nn.l2_loss(face_W_conv2_3) + tf.nn.l2_loss(face_W_conv3_1) + tf.nn.l2_loss(face_W_conv3_2) + tf.nn.l2_loss(face_W_conv4_1) + tf.nn.l2_loss(face_W_conv4_2) + tf.nn.l2_loss(face_W_fc1) + tf.nn.l2_loss(face_W_fc2) + tf.nn.l2_loss(eye_W_conv2_3) + tf.nn.l2_loss(eye_W_conv3_1) + tf.nn.l2_loss(eye_W_conv3_2) + tf.nn.l2_loss(eye_W_conv4_1) + tf.nn.l2_loss(eye_W_conv4_2) + tf.nn.l2_loss(eye1_W_fc1) + tf.nn.l2_loss(eye2_W_fc1) + tf.nn.l2_loss(cls1_W_fc2) + tf.nn.l2_loss(cls1_W_fc3)) return g_hat, t_hat, bias_W_fc, l2_loss
def polygon_regressor(x_image, input_res, input_channels, encoding_length, output_vertex_count, weight_decay): """ Builds the graph for a deep net for encoding and decoding polygons. Args: x_image: input tensor of shape (N_examples, input_res, input_res, input_channels) input_res: image resolution input_channels: image number of channels encoding_length: number of neurons used in the bottleneck to encode the input polygon output_vertex_count: number of vertex of the polygon output weight_decay: Weight decay coefficient Returns: y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates keep_prob: scalar placeholder for the probability of dropout. """ # with tf.name_scope('reshape'): # x_image = tf.reshape(x, [-1, input_res, input_res, 1]) # First convolutional layer - maps one grayscale image to 32 feature maps. with tf.name_scope('conv1'): conv1 = tf_utils.complete_conv2d(x_image, 3, input_channels, 16, weight_decay) # Pooling layer - downsamples by 2X. with tf.name_scope('pool1'): h_pool1 = tf_utils.max_pool_2x2(conv1) # Second convolutional layer -- maps 32 feature maps to 64. with tf.name_scope('conv2'): conv2 = tf_utils.complete_conv2d(h_pool1, 3, 16, 32, weight_decay) # Second pooling layer. with tf.name_scope('pool2'): h_pool2 = tf_utils.max_pool_2x2(conv2) # Third convolutional layer -- maps 64 feature maps to 128. with tf.name_scope('conv3'): conv3 = tf_utils.complete_conv2d(h_pool2, 3, 32, 64, weight_decay) # Second pooling layer. with tf.name_scope('pool3'): h_pool3 = tf_utils.max_pool_2x2(conv3) reduction_factor = 8 # Adjust according to previous layers current_data_dimension = int(input_res / reduction_factor) * int( input_res / reduction_factor) * 64 with tf.name_scope('flatten'): h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension]) # Fully connected layer 1 -- after 2 round of downsampling, our 64x64 image # is down to 8x8x128 feature maps -- map this to 2048 features. with tf.name_scope('fc1'): fc1 = tf_utils.complete_fc(h_pool3_flat, current_data_dimension, 1024, weight_decay, tf.nn.relu) # Dropout - controls the complexity of the model, prevents co-adaptation of # features. with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) # tf.summary.scalar('dropout_keep_probability', keep_prob) fc1_drop = tf.nn.dropout(fc1, keep_prob) # Map the 2048 features to encoding_length features with tf.name_scope('fc2'): fc2 = tf_utils.complete_fc(fc1_drop, 1024, encoding_length, weight_decay, tf.nn.relu) # --- Decoder --- # # Map the encoding_length features to 2048 features with tf.name_scope('fc3'): fc3 = tf_utils.complete_fc(fc2, encoding_length, 512, weight_decay, tf.nn.relu) # Map the 2048 features to the output_vertex_count * 2 output coordinates with tf.name_scope('fc4'): y_flat = tf_utils.complete_fc(fc3, 512, output_vertex_count * 2, weight_decay, tf.nn.sigmoid) with tf.name_scope('reshape_output'): y_coords = tf.reshape(y_flat, [-1, output_vertex_count, 2]) return y_coords, keep_prob
def main(): """ Runs a simple linear regression model on the mnist dataset. """ # Load the mnist dataset. Class stores the train, validation and testing sets as numpy arrays. mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # Create a tensforlow session. sess = tf.InteractiveSession() # Create the computational graph. Start with creating placeholders for the input and output data. # Input placeholder. input_placeholder = tf.placeholder(tf.float32, shape=[None, 784]) # Output placeholder. labeled_data = tf.placeholder(tf.float32, shape=[None, 10]) # Reshape input to a 4D tensor of [ -1 , width, height, channels]. -1 ensures the size remains consitent with # the original size. image_shape = [-1, 28, 28, 1] input_image = tf.reshape(input_placeholder, image_shape) # Create convolutional layers containing 2 convolutional layers and 1 fully connected layer. # Layer 1 computes 32 features for each 5x5 patch. conv1_weights = tf_utils.weight_variable([5, 5, 1, 32]) conv1_bias = tf_utils.bias_variable([32]) # Apply ReLU activation and max pool. conv1_act = tf.nn.relu( tf_utils.conv2d(input_image, conv1_weights) + conv1_bias) conv1_pool = tf_utils.max_pool_2x2(conv1_act) # Layer 2 computes 64 features of 5x5 patch. conv2_weights = tf_utils.weight_variable([5, 5, 32, 64]) conv2_bias = tf_utils.bias_variable([64]) # Apply ReLU activation and max pool. conv2_act = tf.nn.relu( tf_utils.conv2d(conv1_pool, conv2_weights) + conv2_bias) conv2_pool = tf_utils.max_pool_2x2(conv2_act) # Add fully connected layers. fc1_weights = tf_utils.weight_variable([7 * 7 * 64, 1024]) fc1_bias = tf_utils.bias_variable([1024]) # Apply Relu activation to flattened conv2d pool layer. conv2_flat = tf.reshape(conv2_pool, [-1, 7 * 7 * 64]) fc1_act = tf.nn.relu(tf.matmul(conv2_flat, fc1_weights) + fc1_bias) # Add dropout before the readout layer. keep_prob = tf.placeholder(tf.float32) dropout = tf.nn.dropout(fc1_act, keep_prob) # Add the readout layer for the 10 classes. readout_weights = tf_utils.weight_variable([1024, 10]) readout_bias = tf_utils.bias_variable([10]) readout_act = tf.matmul(dropout, readout_weights) + readout_bias # Cross entropy loss between the output labels and the model. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labeled_data, logits=readout_act)) # Define the training step with a learning rate for gradient descent and our cross entropy loss. learning_rate = 1e-4 train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) # Initialize all variables. sess.run(tf.global_variables_initializer()) # Training model evaluation placeholders. # Define a placeholder for comparing equality between output and labels. predictions = tf.equal(tf.argmax(labeled_data, 1), tf.argmax(readout_act, 1)) accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32)) # Run the training for a n steps. steps = 10000 batch_size = 50 for step in xrange(steps): # Sample a batch from the mnist dataset. batch = mnist.train.next_batch(batch_size) # Create a dict of the data from the sampled batch and run one training step. train_step.run(feed_dict={ input_placeholder: batch[0], labeled_data: batch[1], keep_prob: 0.5 }) # Print the training error after every 100 steps. if step % 100 == 0: train_accuracy = accuracy.eval( feed_dict={ input_placeholder: batch[0], labeled_data: batch[1], keep_prob: 1.0 }) print "Step: ", step, " | Train Accuracy: ", train_accuracy print "Accuracy: ", accuracy.eval( feed_dict={ input_placeholder: mnist.test.images, labeled_data: mnist.test.labels, keep_prob: 1.0 })
def polygon_encoder_decoder(x_image, input_res, encoding_length, output_vertex_count, weight_decay=None): """ Builds the graph for a deep net for encoding and decoding polygons. Args: x_image: input variable input_res: an input tensor with the dimensions (N_examples, input_res, input_res, 1) encoding_length: number of neurons used in the bottleneck to encode the input polygon output_vertex_count: number of vertex of the polygon output weight_decay: Weight decay coefficient Returns: y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates keep_prob: scalar placeholder for the probability of dropout. """ # with tf.name_scope('reshape'): # x_image = tf.reshape(x, [-1, input_res, input_res, 1]) # First convolutional layer - maps one grayscale image to 8 feature maps. with tf.name_scope('Features'): with tf.name_scope('conv1'): conv1 = tf_utils.complete_conv2d(x_image, 5, 1, 8, weight_decay) # Pooling layer - downsamples by 2X. with tf.name_scope('pool1'): h_pool1 = tf_utils.max_pool_2x2(conv1) # Second convolutional layer -- maps 8 feature maps to 16. with tf.name_scope('conv2'): conv2 = tf_utils.complete_conv2d(h_pool1, 5, 8, 16, weight_decay) # Second pooling layer. with tf.name_scope('pool2'): h_pool2 = tf_utils.max_pool_2x2(conv2) # Third convolutional layer -- maps 16 feature maps to 32. with tf.name_scope('conv3'): conv3 = tf_utils.complete_conv2d(h_pool2, 5, 16, 32, weight_decay) # Third pooling layer. with tf.name_scope('pool3'): h_pool3 = tf_utils.max_pool_2x2(conv3) current_shape = h_pool3.shape current_data_dimension = int(current_shape[1] * current_shape[2] * current_shape[3]) with tf.name_scope('Encoder'): with tf.name_scope('flatten'): h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension]) # Dropout - controls the complexity of the model, prevents co-adaptation of # features. with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) # tf.summary.scalar('dropout_keep_probability', keep_prob) h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob) with tf.name_scope('fc1'): fc1 = tf_utils.complete_fc(h_pool3_flat_drop, current_data_dimension, encoding_length, weight_decay, tf.nn.relu) y_coords = decode(fc1, encoding_length, output_vertex_count, weight_decay, scope_name="Decode") return y_coords, keep_prob
def create_fcn(placeholder, keep_prob, classes): """ Setup the main conv/deconv network """ with tf.variable_scope('inference'): vgg_net = create_vgg19(placeholder) conv_final = vgg_net['relu5_4'] output = tf_utils.max_pool_2x2(conv_final) conv_shapes = [[7, 7, 512, 4096], [1, 1, 4096, 4096], [1, 1, 4096, classes]] for i, conv_shape in enumerate(conv_shapes): name = 'conv%d' % (i + 6) with tf.variable_scope(name): W = tf_utils.weight_variable(conv_shape, name=name + '_w') b = tf_utils.bias_variable(conv_shape[-1:], name=name + '_b') output = tf_utils.conv2d(output, W, b) with tf.variable_scope('relu%d' % (i + 6)): if i < 2: output = tf.nn.relu(output) tf_utils.add_activation_summary(output, collections=['train']) output = tf.nn.dropout(output, keep_prob=keep_prob) pool4 = vgg_net['pool4'] pool3 = vgg_net['pool3'] deconv_shapes = [ tf.shape(pool4), tf.shape(pool3), tf.stack([ tf.shape(placeholder)[0], tf.shape(placeholder)[1], tf.shape(placeholder)[2], classes ]) ] W_shapes = [[4, 4, pool4.get_shape()[3].value, classes], [ 4, 4, pool3.get_shape()[3].value, pool4.get_shape()[3].value ], [16, 16, classes, pool3.get_shape()[3].value]] strides = [2, 2, 8] for i in range(3): name = 'deconv%d' % (i + 1) with tf.variable_scope(name): W = tf_utils.weight_variable(W_shapes[i], name=name + '_w') output = tf_utils.conv2d_transpose( output, W, None, output_shape=deconv_shapes[i], stride=strides[i]) with tf.variable_scope('skip%d' % (i + 1)): if i < 2: output = tf.add(output, vgg_net['pool%d' % (4 - i)]) prediction = tf.argmax(output, dimension=3, name='prediction') return tf.expand_dims(prediction, dim=3), output