Exemple #1
0
def building_layer(inputs, filters, is_training, projection_shortcut, strides,
                   data_format, relu_leakiness, reuse):
    """
    Standard building block for residual networks with BN before convolutions.

    Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    filters: The number of filters for the convolutions.
    is_training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
    projection_shortcut: The function to use for projection shortcuts
    (typically a 1x1 convolution when downsampling the input).
    strides: The block"s stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ("channels_last" or "channels_first").

    Returns:
    The output tensor of the block.
    """
    with tf.variable_scope("pre_activation_and_shortcuts"):
        shortcut = inputs
        inputs = batch_norm_act_fun(inputs,
                                    is_training,
                                    data_format,
                                    relu_leakiness,
                                    reuse=reuse)

        # The projection shortcut should come after the first
        # batch norm and ReLU since it performs a 1x1 convolution.
        if projection_shortcut is not None:
            shortcut = projection_shortcut(inputs)

    with tf.variable_scope("conv_one"):
        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=3,
                                      strides=strides,
                                      data_format=data_format,
                                      reuse=reuse)

    with tf.variable_scope("conv_two"):
        inputs = batch_norm_act_fun(inputs,
                                    is_training,
                                    data_format,
                                    relu_leakiness,
                                    reuse=reuse)
        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=3,
                                      strides=1,
                                      data_format=data_format,
                                      reuse=reuse)

    return inputs + shortcut
Exemple #2
0
def conv_act(inputs,
             filters,
             kernel_size=1,
             strides=1,
             leakiness=.1,
             data_format="channels_last",
             reuse=False,
             is_training=True,
             name="",
             scope=""):
    """
    Use global custom conv+activation function
    """
    net = conv2d_fixed_padding(inputs=inputs,
                               filters=filters,
                               kernel_size=kernel_size,
                               strides=strides,
                               data_format=data_format,
                               reuse=reuse,
                               name=scope + name,
                               activation=log_act_helper if cfg.use_log_act
                               and cfg.counter == 0 else None)
    net = batch_norm_act_fun(net, is_training, data_format, leakiness, reuse,
                             False)
    if cfg.counter == 0:
        cfg.counter += 1
    return net
Exemple #3
0
 def projection_shortcut(inputs):
     return conv2d_fixed_padding(inputs=inputs,
                                 filters=filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 data_format=data_format,
                                 reuse=reuse)
Exemple #4
0
def vgg_16(inputs,
           num_classes=11,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope="vgg_16",
           fc_conv_padding="VALID",
           global_pool=True,
           data_format=None,
           reuse=False):
    """
    Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to
    conv2d layers. To use in classification mode, resize input to 224x224.

    Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of
    the outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use "SAME" padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output.
      Otherwise, the output prediction map will be (input / 32) - 6 in case of
      "VALID" padding.
    global_pool: Optional boolean flag. If True, the input to the
    classification layer is avgpooled to size 1x1, for any input size.
    (This is not part of the original VGG architecture.)

    Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the input to the logits layer (if num_classes is 0 or None).
    end_points: a dict of tensors with intermediate activations.
    """
    if data_format == "channels_first":
        # Convert from channels_last (channels_last) to
        # channels_first (channels_first).
        # This provides a large performance boost on GPU.
        inp_shape = inputs.get_shape()
        inputs = tf.transpose(inputs, [0, 3, 1, 2])
        print("Transpose the inputs to channels_first, from: {} to: {}".format(
            inp_shape, inputs.get_shape()))
        print("_____________")
        dat_form_old = "NCHW"
        pool_ = [1, 1, 2, 2]
        global_pool_idx = [2, 3]
    else:
        dat_form_old = "NHWC"
        pool_ = [1, 2, 2, 1]
        global_pool_idx = [1, 2]
    cfg.counter = 0

    with tf.variable_scope(scope, "vgg_16", [inputs]):
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        print("Input Shape: {}".format(inputs.get_shape()))
        net = tf.contrib.layers.repeat(inputs,
                                       2,
                                       conv_act,
                                       64,
                                       3,
                                       reuse=reuse,
                                       name="conv1",
                                       data_format=data_format)
        print("First conv block: {}".format(net.get_shape()))
        net = tf.nn.max_pool(net,
                             pool_,
                             pool_,
                             "VALID",
                             name="pool1",
                             data_format=dat_form_old)
        print("After Max-Pooling: {}".format(net.get_shape()))
        net = tf.contrib.layers.repeat(net,
                                       2,
                                       conv_act,
                                       128,
                                       3,
                                       reuse=reuse,
                                       name="conv2",
                                       data_format=data_format)
        print("Second conv block: {}".format(net.get_shape()))
        net = tf.nn.max_pool(net,
                             pool_,
                             pool_,
                             "VALID",
                             name="pool2",
                             data_format=dat_form_old)
        print("Max-Pooling: {}".format(net.get_shape()))
        net = tf.contrib.layers.repeat(net,
                                       3,
                                       conv_act,
                                       256,
                                       3,
                                       reuse=reuse,
                                       name="conv3",
                                       data_format=data_format)
        print("Third conv block: {}".format(net.get_shape()))
        net = tf.nn.max_pool(net,
                             pool_,
                             pool_,
                             "VALID",
                             name="pool3",
                             data_format=dat_form_old)
        print("Max-Pooling: {}".format(net.get_shape()))
        net = tf.contrib.layers.repeat(net,
                                       3,
                                       conv_act,
                                       512,
                                       3,
                                       reuse=reuse,
                                       name="conv4",
                                       data_format=data_format)
        print("Fourth conv block: {}".format(net.get_shape()))
        net = tf.nn.max_pool(net,
                             pool_,
                             pool_,
                             "VALID",
                             name="pool4",
                             data_format=dat_form_old)
        print("Max-Pooling: {}".format(net.get_shape()))
        net = tf.contrib.layers.repeat(net,
                                       3,
                                       conv_act,
                                       512,
                                       3,
                                       reuse=reuse,
                                       name="conv5",
                                       data_format=data_format)
        print("Fifth conv block: {}".format(net.get_shape()))
        net = tf.nn.max_pool(net,
                             pool_,
                             pool_,
                             "VALID",
                             name="pool5",
                             data_format=dat_form_old)
        print("Max-Pooling: {}".format(net.get_shape()))

        print("Use conv2d instead of fully_connected layers.")
        net = conv_act(net, 4096, 7, name="fc6", reuse=reuse)
        net = tf.nn.dropout(net, dropout_keep_prob, name="dropout6")
        net = conv_act(net, 4096, 1, name="fc7", reuse=reuse)
        print("Last conv block: {}".format(net.get_shape()))

        if global_pool:
            net = tf.reduce_mean(net,
                                 global_pool_idx,
                                 keepdims=True,
                                 name="global_pool")
        if num_classes:
            net = tf.nn.dropout(net, dropout_keep_prob, name="dropout7")
            net = conv2d_fixed_padding(net,
                                       num_classes,
                                       1,
                                       1,
                                       data_format,
                                       activation=None,
                                       name="fc8",
                                       reuse=reuse)
            if spatial_squeeze and num_classes is not None:
                net = tf.squeeze(net, global_pool_idx, name="fc8/squeezed")

        net.default_image_size = 224
        return net
Exemple #5
0
    def model(inputs, is_training, reuse):
        """
        Constructs the airynet model given the inputs.
        """
        with tf.variable_scope("nn_in"):
            inp_shape = inputs.get_shape()
            print("Original Image dimensions: {}".format(inp_shape))
            if data_format == "channels_first":
                # Convert from channels_last (channels_last) to
                # channels_first (channels_first).
                # This provides a large performance boost on GPU.
                inputs = tf.transpose(inputs, [0, 3, 1, 2])
                print(
                    "Transpose the inputs to channels_first, from: {} to: {}".
                    format(inp_shape, inputs.get_shape()))
                print("_____________")

            def log_act_helper(x):
                return tf.where(tf.greater(x, 0), log_act(x),
                                log_act(x, False))

            # this is the log layer
            inputs = conv2d_fixed_padding(
                inputs=inputs,
                filters=64,
                kernel_size=7,
                strides=2,
                data_format=data_format,
                reuse=reuse,
                activation=log_act_helper if cfg.use_log_act else None)
            inputs = tf.identity(inputs, "initial_conv")
            print("After the first convolution: {}".format(inputs.get_shape()))
            inputs = tf.layers.max_pooling2d(inputs=inputs,
                                             pool_size=3,
                                             strides=2,
                                             padding="SAME",
                                             data_format=data_format)
            inputs = tf.identity(inputs, "initial_max_pool")
            print("After Max Pooling with pool size three: {}".format(
                inputs.get_shape()))

        with tf.variable_scope("first_block"):
            inputs = block_layer_fn(inputs=inputs,
                                    filters=64,
                                    layer_fn=layer_fn,
                                    block_layer=layers[0],
                                    strides=1,
                                    is_training=is_training,
                                    name="first_block_layer_fn",
                                    data_format=data_format,
                                    relu_leakiness=relu_leakiness,
                                    reuse=reuse)
            print("After the first block: {}".format(inputs.get_shape()))

        with tf.variable_scope("second_block"):
            inputs = block_layer_fn(inputs=inputs,
                                    filters=128,
                                    layer_fn=layer_fn,
                                    block_layer=layers[1],
                                    strides=2,
                                    is_training=is_training,
                                    name="second_block_layer_fn",
                                    data_format=data_format,
                                    relu_leakiness=relu_leakiness,
                                    reuse=reuse)
            print("After the second block: {}".format(inputs.get_shape()))

        with tf.variable_scope("third_block"):
            inputs = block_layer_fn(inputs=inputs,
                                    filters=256,
                                    layer_fn=layer_fn,
                                    block_layer=layers[2],
                                    strides=2,
                                    is_training=is_training,
                                    name="third_block_layer_fn",
                                    data_format=data_format,
                                    relu_leakiness=relu_leakiness,
                                    reuse=reuse)
            print("After the third block: {}".format(inputs.get_shape()))

        with tf.variable_scope("fourth_block"):
            inputs = block_layer_fn(inputs=inputs,
                                    filters=512,
                                    layer_fn=layer_fn,
                                    block_layer=layers[3],
                                    strides=2,
                                    is_training=is_training,
                                    name="last_block_before_fc",
                                    data_format=data_format,
                                    relu_leakiness=relu_leakiness,
                                    reuse=reuse)
            print("After the fourth block: {}".format(inputs.get_shape()))

        with tf.variable_scope("nn_out"):
            inputs = batch_norm_act_fun(inputs,
                                        is_training,
                                        data_format,
                                        relu_leakiness,
                                        reuse=reuse)
            inputs = tf.layers.average_pooling2d(inputs=inputs,
                                                 pool_size=7,
                                                 strides=1,
                                                 padding="VALID",
                                                 data_format=data_format)
            inputs = tf.identity(inputs, "final_avg_pool")
            print("After Max Pooling with pool size seven: {}".format(
                inputs.get_shape()))
            inputs = tf.reshape(
                inputs, [-1, 512 if layer_fn is building_layer else 2048])
            print("After reshaping, to serve the fc: {}".format(
                inputs.get_shape()))
            inputs = tf.layers.dense(
                inputs=inputs,
                units=num_classes,
                kernel_regularizer=tf.contrib.layers.l1_l2_regularizer(
                    scale_l1=cfg.l1_regularization_scale,
                    scale_l2=cfg.l2_regularization_scale))
            inputs = tf.identity(inputs, "final_dense")
            print("After final fc layer: {}".format(inputs.get_shape()))
        return inputs