Esempio n. 1
0
def encode(input_tensor, encoding_length, weight_decay, scope_name="Encode"):

    input_tensor_shape = input_tensor.shape
    input_tensor_channel_count = int(input_tensor_shape[3])

    with tf.name_scope(scope_name):
        with tf.name_scope('conv1'):
            conv1 = tf_utils.complete_conv2d(input_tensor, 1,
                                             input_tensor_channel_count, 512,
                                             weight_decay)

        with tf.name_scope('conv2'):
            conv2 = tf_utils.complete_conv2d(conv1, 1, 512, 128, weight_decay)

        current_output_shape = conv2.shape
        current_data_dimension = int(current_output_shape[1] *
                                     current_output_shape[2] *
                                     current_output_shape[3])

        with tf.name_scope('flatten'):
            conv2_flat = tf.reshape(conv2, [-1, current_data_dimension])

        # Dropout - controls the complexity of the model, prevents co-adaptation of
        # features.
        with tf.name_scope('dropout'):
            keep_prob = tf.placeholder(tf.float32)
            # tf.summary.scalar('dropout_keep_probability', keep_prob)
            inception_output_drop = tf.nn.dropout(conv2_flat, keep_prob)

        with tf.name_scope('fc1'):
            fc1 = tf_utils.complete_fc(inception_output_drop,
                                       current_data_dimension, encoding_length,
                                       weight_decay, tf.nn.relu)

    return fc1, keep_prob
Esempio n. 2
0
def upsample_crop_concat(to_upsample, input_to_crop, size=(2, 2), weight_decay=None, name=None):
    """Upsample `to_upsample`, crop to match resolution of `input_to_crop` and concat the two.

    Args:
        input_A (4-D Tensor): (N, H, W, C)
        input_to_crop (4-D Tensor): (N, 2*H + padding, 2*W + padding, C2)
        size (tuple): (height_multiplier, width_multiplier) (default: (2, 2))
        name (str): name of the concat operation (default: None)

    Returns:
        output (4-D Tensor): (N, size[0]*H, size[1]*W, 2*C2)
    """
    H, W, _ = to_upsample.get_shape().as_list()[1:]
    _, _, target_C = input_to_crop.get_shape().as_list()[1:]

    H_multi, W_multi = size
    target_H = H * H_multi
    target_W = W * W_multi

    upsample = tf.image.resize_bilinear(to_upsample, (target_H, target_W), name="upsample_{}".format(name))
    upsample = tf_utils.complete_conv2d(upsample, target_C, (3, 3), padding="SAME", bias_init_value=-0.01,
                                        weight_decay=weight_decay,
                                        summary=SUMMARY)

    # TODO: initialize upsample with bilinear weights
    # upsample = tf.layers.conv2d_transpose(to_upsample, target_C, kernel_size=2, strides=1, padding="valid", name="deconv{}".format(name))

    crop = tf.image.resize_image_with_crop_or_pad(input_to_crop, target_H, target_W)

    return tf.concat([upsample, crop], axis=-1, name="concat_{}".format(name))
Esempio n. 3
0
def conv_conv_pool(input_, n_filters, name="", pool=True, activation=tf.nn.elu, weight_decay=None,
                   dropout_keep_prob=None):
    """{Conv -> BN -> RELU}x2 -> {Pool, optional}

    Args:
        input_ (4-D Tensor): (batch_size, H, W, C)
        n_filters (list): number of filters [int, int]
        training (1-D Tensor): Boolean Tensor
        name (str): name postfix
        pool (bool): If True, MaxPool2D
        activation: Activation function
        weight_decay: Weight decay rate

    Returns:
        net: output of the Convolution operations
        pool (optional): output of the max pooling operations
    """
    net = input_

    with tf.variable_scope("layer_{}".format(name)):
        for i, F in enumerate(n_filters):
            net = tf_utils.complete_conv2d(net, F, (3, 3), padding="VALID", activation=activation,
                                           bias_init_value=-0.01,
                                           weight_decay=weight_decay,
                                           summary=SUMMARY)
        if pool is False:
            return net, None
        else:
            pool = tf.layers.max_pooling2d(net, (2, 2), strides=(2, 2), name="pool_{}".format(name))
            return net, pool
Esempio n. 4
0
def polygon_regressor(x_image, input_res, input_channels, encoding_length,
                      output_vertex_count, weight_decay):
    """
    Builds the graph for a deep net for encoding and decoding polygons.

    Args:
      x_image: input tensor of shape (N_examples, input_res, input_res, input_channels)
      input_res: image resolution
      input_channels: image number of channels
      encoding_length: number of neurons used in the bottleneck to encode the input polygon
      output_vertex_count: number of vertex of the polygon output
      weight_decay: Weight decay coefficient

    Returns:
      y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates
      keep_prob: scalar placeholder for the probability of dropout.
    """
    # with tf.name_scope('reshape'):
    #     x_image = tf.reshape(x, [-1, input_res, input_res, 1])

    # First convolutional layer - maps one grayscale image to 32 feature maps.
    with tf.name_scope('conv1'):
        conv1 = tf_utils.complete_conv2d(x_image, 3, input_channels, 16,
                                         weight_decay)

    # Pooling layer - downsamples by 2X.
    with tf.name_scope('pool1'):
        h_pool1 = tf_utils.max_pool_2x2(conv1)

    # Second convolutional layer -- maps 32 feature maps to 64.
    with tf.name_scope('conv2'):
        conv2 = tf_utils.complete_conv2d(h_pool1, 3, 16, 32, weight_decay)

    # Second pooling layer.
    with tf.name_scope('pool2'):
        h_pool2 = tf_utils.max_pool_2x2(conv2)

    # Third convolutional layer -- maps 64 feature maps to 128.
    with tf.name_scope('conv3'):
        conv3 = tf_utils.complete_conv2d(h_pool2, 3, 32, 64, weight_decay)

    # Second pooling layer.
    with tf.name_scope('pool3'):
        h_pool3 = tf_utils.max_pool_2x2(conv3)

    reduction_factor = 8  # Adjust according to previous layers
    current_data_dimension = int(input_res / reduction_factor) * int(
        input_res / reduction_factor) * 64

    with tf.name_scope('flatten'):
        h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension])

    # Fully connected layer 1 -- after 2 round of downsampling, our 64x64 image
    # is down to 8x8x128 feature maps -- map this to 2048 features.
    with tf.name_scope('fc1'):
        fc1 = tf_utils.complete_fc(h_pool3_flat, current_data_dimension, 1024,
                                   weight_decay, tf.nn.relu)

    # Dropout - controls the complexity of the model, prevents co-adaptation of
    # features.
    with tf.name_scope('dropout'):
        keep_prob = tf.placeholder(tf.float32)
        # tf.summary.scalar('dropout_keep_probability', keep_prob)
        fc1_drop = tf.nn.dropout(fc1, keep_prob)

    # Map the 2048 features to encoding_length features
    with tf.name_scope('fc2'):
        fc2 = tf_utils.complete_fc(fc1_drop, 1024, encoding_length,
                                   weight_decay, tf.nn.relu)

    # --- Decoder --- #

    # Map the encoding_length features to 2048 features
    with tf.name_scope('fc3'):
        fc3 = tf_utils.complete_fc(fc2, encoding_length, 512, weight_decay,
                                   tf.nn.relu)

    # Map the 2048 features to the output_vertex_count * 2 output coordinates
    with tf.name_scope('fc4'):
        y_flat = tf_utils.complete_fc(fc3, 512, output_vertex_count * 2,
                                      weight_decay, tf.nn.sigmoid)

    with tf.name_scope('reshape_output'):
        y_coords = tf.reshape(y_flat, [-1, output_vertex_count, 2])

    return y_coords, keep_prob
Esempio n. 5
0
def polygon_encoder_decoder(x_image,
                            input_res,
                            encoding_length,
                            output_vertex_count,
                            weight_decay=None):
    """
    Builds the graph for a deep net for encoding and decoding polygons.

    Args:
      x_image: input variable
      input_res: an input tensor with the dimensions (N_examples, input_res, input_res, 1)
      encoding_length: number of neurons used in the bottleneck to encode the input polygon
      output_vertex_count: number of vertex of the polygon output
      weight_decay: Weight decay coefficient

    Returns:
      y: tensor of shape (N_examples, output_vertex_count, 2), with vertex coordinates
      keep_prob: scalar placeholder for the probability of dropout.
    """
    # with tf.name_scope('reshape'):
    #     x_image = tf.reshape(x, [-1, input_res, input_res, 1])

    # First convolutional layer - maps one grayscale image to 8 feature maps.
    with tf.name_scope('Features'):
        with tf.name_scope('conv1'):
            conv1 = tf_utils.complete_conv2d(x_image, 5, 1, 8, weight_decay)

        # Pooling layer - downsamples by 2X.
        with tf.name_scope('pool1'):
            h_pool1 = tf_utils.max_pool_2x2(conv1)

        # Second convolutional layer -- maps 8 feature maps to 16.
        with tf.name_scope('conv2'):
            conv2 = tf_utils.complete_conv2d(h_pool1, 5, 8, 16, weight_decay)

        # Second pooling layer.
        with tf.name_scope('pool2'):
            h_pool2 = tf_utils.max_pool_2x2(conv2)

        # Third convolutional layer -- maps 16 feature maps to 32.
        with tf.name_scope('conv3'):
            conv3 = tf_utils.complete_conv2d(h_pool2, 5, 16, 32, weight_decay)

        # Third pooling layer.
        with tf.name_scope('pool3'):
            h_pool3 = tf_utils.max_pool_2x2(conv3)

    current_shape = h_pool3.shape
    current_data_dimension = int(current_shape[1] * current_shape[2] *
                                 current_shape[3])

    with tf.name_scope('Encoder'):
        with tf.name_scope('flatten'):
            h_pool3_flat = tf.reshape(h_pool3, [-1, current_data_dimension])

        # Dropout - controls the complexity of the model, prevents co-adaptation of
        # features.
        with tf.name_scope('dropout'):
            keep_prob = tf.placeholder(tf.float32)
            # tf.summary.scalar('dropout_keep_probability', keep_prob)
            h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob)

        with tf.name_scope('fc1'):
            fc1 = tf_utils.complete_fc(h_pool3_flat_drop,
                                       current_data_dimension, encoding_length,
                                       weight_decay, tf.nn.relu)

    y_coords = decode(fc1,
                      encoding_length,
                      output_vertex_count,
                      weight_decay,
                      scope_name="Decode")

    return y_coords, keep_prob