def encode(input_tensor, encoding_length, weight_decay, scope_name="Encode"): input_tensor_shape = input_tensor.shape input_tensor_channel_count = int(input_tensor_shape[3]) with tf.name_scope(scope_name): with tf.name_scope('conv1'): conv1 = tf_utils.complete_conv2d(input_tensor, 1, input_tensor_channel_count, 512, weight_decay) with tf.name_scope('conv2'): conv2 = tf_utils.complete_conv2d(conv1, 1, 512, 128, weight_decay) current_output_shape = conv2.shape current_data_dimension = int(current_output_shape[1] * current_output_shape[2] * current_output_shape[3]) with tf.name_scope('flatten'): conv2_flat = tf.reshape(conv2, [-1, current_data_dimension]) # Dropout - controls the complexity of the model, prevents co-adaptation of # features. with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) # tf.summary.scalar('dropout_keep_probability', keep_prob) inception_output_drop = tf.nn.dropout(conv2_flat, keep_prob) with tf.name_scope('fc1'): fc1 = tf_utils.complete_fc(inception_output_drop, current_data_dimension, encoding_length, weight_decay, tf.nn.relu) return fc1, keep_prob
def upsample_crop_concat(to_upsample, input_to_crop, size=(2, 2), weight_decay=None, name=None): """Upsample `to_upsample`, crop to match resolution of `input_to_crop` and concat the two. Args: input_A (4-D Tensor): (N, H, W, C) input_to_crop (4-D Tensor): (N, 2*H + padding, 2*W + padding, C2) size (tuple): (height_multiplier, width_multiplier) (default: (2, 2)) name (str): name of the concat operation (default: None) Returns: output (4-D Tensor): (N, size[0]*H, size[1]*W, 2*C2) """ H, W, _ = to_upsample.get_shape().as_list()[1:] _, _, target_C = input_to_crop.get_shape().as_list()[1:] H_multi, W_multi = size target_H = H * H_multi target_W = W * W_multi upsample = tf.image.resize_bilinear(to_upsample, (target_H, target_W), name="upsample_{}".format(name)) upsample = tf_utils.complete_conv2d(upsample, target_C, (3, 3), padding="SAME", bias_init_value=-0.01, weight_decay=weight_decay, summary=SUMMARY) # TODO: initialize upsample with bilinear weights # upsample = tf.layers.conv2d_transpose(to_upsample, target_C, kernel_size=2, strides=1, padding="valid", name="deconv{}".format(name)) crop = tf.image.resize_image_with_crop_or_pad(input_to_crop, target_H, target_W) return tf.concat([upsample, crop], axis=-1, name="concat_{}".format(name))
def conv_conv_pool(input_, n_filters, name="", pool=True, activation=tf.nn.elu, weight_decay=None, dropout_keep_prob=None): """{Conv -> BN -> RELU}x2 -> {Pool, optional} Args: input_ (4-D Tensor): (batch_size, H, W, C) n_filters (list): number of filters [int, int] training (1-D Tensor): Boolean Tensor name (str): name postfix pool (bool): If True, MaxPool2D activation: Activation function weight_decay: Weight decay rate Returns: net: output of the Convolution operations pool (optional): output of the max pooling operations """ net = input_ with tf.variable_scope("layer_{}".format(name)): for i, F in enumerate(n_filters): net = tf_utils.complete_conv2d(net, F, (3, 3), padding="VALID", activation=activation, bias_init_value=-0.01, weight_decay=weight_decay, summary=SUMMARY) if pool is False: return net, None else: pool = tf.layers.max_pooling2d(net, (2, 2), strides=(2, 2), name="pool_{}".format(name)) return net, pool
def polygon_regressor(x_image, input_res, input_channels, encoding_length, output_vertex_count, weight_decay): """ Builds the graph for a deep net for encoding and decoding polygons. Args: x_image: input tensor of shape (N_examples, input_res, input_res, input_channels) input_res: image resolution input_channels: image number of channels encoding_length: number of neurons used in the bottleneck to encode the input polygon output_vertex_count: number of vertex of the polygon output weight_decay: Weight decay coefficient Returns: y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates keep_prob: scalar placeholder for the probability of dropout. """ # with tf.name_scope('reshape'): # x_image = tf.reshape(x, [-1, input_res, input_res, 1]) # First convolutional layer - maps one grayscale image to 32 feature maps. with tf.name_scope('conv1'): conv1 = tf_utils.complete_conv2d(x_image, 3, input_channels, 16, weight_decay) # Pooling layer - downsamples by 2X. with tf.name_scope('pool1'): h_pool1 = tf_utils.max_pool_2x2(conv1) # Second convolutional layer -- maps 32 feature maps to 64. with tf.name_scope('conv2'): conv2 = tf_utils.complete_conv2d(h_pool1, 3, 16, 32, weight_decay) # Second pooling layer. with tf.name_scope('pool2'): h_pool2 = tf_utils.max_pool_2x2(conv2) # Third convolutional layer -- maps 64 feature maps to 128. with tf.name_scope('conv3'): conv3 = tf_utils.complete_conv2d(h_pool2, 3, 32, 64, weight_decay) # Second pooling layer. with tf.name_scope('pool3'): h_pool3 = tf_utils.max_pool_2x2(conv3) reduction_factor = 8 # Adjust according to previous layers current_data_dimension = int(input_res / reduction_factor) * int( input_res / reduction_factor) * 64 with tf.name_scope('flatten'): h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension]) # Fully connected layer 1 -- after 2 round of downsampling, our 64x64 image # is down to 8x8x128 feature maps -- map this to 2048 features. with tf.name_scope('fc1'): fc1 = tf_utils.complete_fc(h_pool3_flat, current_data_dimension, 1024, weight_decay, tf.nn.relu) # Dropout - controls the complexity of the model, prevents co-adaptation of # features. with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) # tf.summary.scalar('dropout_keep_probability', keep_prob) fc1_drop = tf.nn.dropout(fc1, keep_prob) # Map the 2048 features to encoding_length features with tf.name_scope('fc2'): fc2 = tf_utils.complete_fc(fc1_drop, 1024, encoding_length, weight_decay, tf.nn.relu) # --- Decoder --- # # Map the encoding_length features to 2048 features with tf.name_scope('fc3'): fc3 = tf_utils.complete_fc(fc2, encoding_length, 512, weight_decay, tf.nn.relu) # Map the 2048 features to the output_vertex_count * 2 output coordinates with tf.name_scope('fc4'): y_flat = tf_utils.complete_fc(fc3, 512, output_vertex_count * 2, weight_decay, tf.nn.sigmoid) with tf.name_scope('reshape_output'): y_coords = tf.reshape(y_flat, [-1, output_vertex_count, 2]) return y_coords, keep_prob
def polygon_encoder_decoder(x_image, input_res, encoding_length, output_vertex_count, weight_decay=None): """ Builds the graph for a deep net for encoding and decoding polygons. Args: x_image: input variable input_res: an input tensor with the dimensions (N_examples, input_res, input_res, 1) encoding_length: number of neurons used in the bottleneck to encode the input polygon output_vertex_count: number of vertex of the polygon output weight_decay: Weight decay coefficient Returns: y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates keep_prob: scalar placeholder for the probability of dropout. """ # with tf.name_scope('reshape'): # x_image = tf.reshape(x, [-1, input_res, input_res, 1]) # First convolutional layer - maps one grayscale image to 8 feature maps. with tf.name_scope('Features'): with tf.name_scope('conv1'): conv1 = tf_utils.complete_conv2d(x_image, 5, 1, 8, weight_decay) # Pooling layer - downsamples by 2X. with tf.name_scope('pool1'): h_pool1 = tf_utils.max_pool_2x2(conv1) # Second convolutional layer -- maps 8 feature maps to 16. with tf.name_scope('conv2'): conv2 = tf_utils.complete_conv2d(h_pool1, 5, 8, 16, weight_decay) # Second pooling layer. with tf.name_scope('pool2'): h_pool2 = tf_utils.max_pool_2x2(conv2) # Third convolutional layer -- maps 16 feature maps to 32. with tf.name_scope('conv3'): conv3 = tf_utils.complete_conv2d(h_pool2, 5, 16, 32, weight_decay) # Third pooling layer. with tf.name_scope('pool3'): h_pool3 = tf_utils.max_pool_2x2(conv3) current_shape = h_pool3.shape current_data_dimension = int(current_shape[1] * current_shape[2] * current_shape[3]) with tf.name_scope('Encoder'): with tf.name_scope('flatten'): h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension]) # Dropout - controls the complexity of the model, prevents co-adaptation of # features. with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) # tf.summary.scalar('dropout_keep_probability', keep_prob) h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob) with tf.name_scope('fc1'): fc1 = tf_utils.complete_fc(h_pool3_flat_drop, current_data_dimension, encoding_length, weight_decay, tf.nn.relu) y_coords = decode(fc1, encoding_length, output_vertex_count, weight_decay, scope_name="Decode") return y_coords, keep_prob