def building_layer(inputs, filters, is_training, projection_shortcut, strides, data_format, relu_leakiness, reuse): """ Standard building block for residual networks with BN before convolutions. Args: inputs: A tensor of size [batch, channels, height_in, width_in] or [batch, height_in, width_in, channels] depending on data_format. filters: The number of filters for the convolutions. is_training: A Boolean for whether the model is in training or inference mode. Needed for batch normalization. projection_shortcut: The function to use for projection shortcuts (typically a 1x1 convolution when downsampling the input). strides: The block"s stride. If greater than 1, this block will ultimately downsample the input. data_format: The input format ("channels_last" or "channels_first"). Returns: The output tensor of the block. """ with tf.variable_scope("pre_activation_and_shortcuts"): shortcut = inputs inputs = batch_norm_act_fun(inputs, is_training, data_format, relu_leakiness, reuse=reuse) # The projection shortcut should come after the first # batch norm and ReLU since it performs a 1x1 convolution. if projection_shortcut is not None: shortcut = projection_shortcut(inputs) with tf.variable_scope("conv_one"): inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=strides, data_format=data_format, reuse=reuse) with tf.variable_scope("conv_two"): inputs = batch_norm_act_fun(inputs, is_training, data_format, relu_leakiness, reuse=reuse) inputs = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=1, data_format=data_format, reuse=reuse) return inputs + shortcut
def conv_act(inputs, filters, kernel_size=1, strides=1, leakiness=.1, data_format="channels_last", reuse=False, is_training=True, name="", scope=""): """ Use global custom conv+activation function """ net = conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, data_format=data_format, reuse=reuse, name=scope + name, activation=log_act_helper if cfg.use_log_act and cfg.counter == 0 else None) net = batch_norm_act_fun(net, is_training, data_format, leakiness, reuse, False) if cfg.counter == 0: cfg.counter += 1 return net
def projection_shortcut(inputs): return conv2d_fixed_padding(inputs=inputs, filters=filters_out, kernel_size=1, strides=strides, data_format=data_format, reuse=reuse)
def vgg_16(inputs, num_classes=11, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope="vgg_16", fc_conv_padding="VALID", global_pool=True, data_format=None, reuse=False): """ Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use "SAME" padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of "VALID" padding. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original VGG architecture.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ if data_format == "channels_first": # Convert from channels_last (channels_last) to # channels_first (channels_first). # This provides a large performance boost on GPU. inp_shape = inputs.get_shape() inputs = tf.transpose(inputs, [0, 3, 1, 2]) print("Transpose the inputs to channels_first, from: {} to: {}".format( inp_shape, inputs.get_shape())) print("_____________") dat_form_old = "NCHW" pool_ = [1, 1, 2, 2] global_pool_idx = [2, 3] else: dat_form_old = "NHWC" pool_ = [1, 2, 2, 1] global_pool_idx = [1, 2] cfg.counter = 0 with tf.variable_scope(scope, "vgg_16", [inputs]): # Collect outputs for conv2d, fully_connected and max_pool2d. print("Input Shape: {}".format(inputs.get_shape())) net = tf.contrib.layers.repeat(inputs, 2, conv_act, 64, 3, reuse=reuse, name="conv1", data_format=data_format) print("First conv block: {}".format(net.get_shape())) net = tf.nn.max_pool(net, pool_, pool_, "VALID", name="pool1", data_format=dat_form_old) print("After Max-Pooling: {}".format(net.get_shape())) net = tf.contrib.layers.repeat(net, 2, conv_act, 128, 3, reuse=reuse, name="conv2", data_format=data_format) print("Second conv block: {}".format(net.get_shape())) net = tf.nn.max_pool(net, pool_, pool_, "VALID", name="pool2", data_format=dat_form_old) print("Max-Pooling: {}".format(net.get_shape())) net = tf.contrib.layers.repeat(net, 3, conv_act, 256, 3, reuse=reuse, name="conv3", data_format=data_format) print("Third conv block: {}".format(net.get_shape())) net = tf.nn.max_pool(net, pool_, pool_, "VALID", name="pool3", data_format=dat_form_old) print("Max-Pooling: {}".format(net.get_shape())) net = tf.contrib.layers.repeat(net, 3, conv_act, 512, 3, reuse=reuse, name="conv4", data_format=data_format) print("Fourth conv block: {}".format(net.get_shape())) net = tf.nn.max_pool(net, pool_, pool_, "VALID", name="pool4", data_format=dat_form_old) print("Max-Pooling: {}".format(net.get_shape())) net = tf.contrib.layers.repeat(net, 3, conv_act, 512, 3, reuse=reuse, name="conv5", data_format=data_format) print("Fifth conv block: {}".format(net.get_shape())) net = tf.nn.max_pool(net, pool_, pool_, "VALID", name="pool5", data_format=dat_form_old) print("Max-Pooling: {}".format(net.get_shape())) print("Use conv2d instead of fully_connected layers.") net = conv_act(net, 4096, 7, name="fc6", reuse=reuse) net = tf.nn.dropout(net, dropout_keep_prob, name="dropout6") net = conv_act(net, 4096, 1, name="fc7", reuse=reuse) print("Last conv block: {}".format(net.get_shape())) if global_pool: net = tf.reduce_mean(net, global_pool_idx, keepdims=True, name="global_pool") if num_classes: net = tf.nn.dropout(net, dropout_keep_prob, name="dropout7") net = conv2d_fixed_padding(net, num_classes, 1, 1, data_format, activation=None, name="fc8", reuse=reuse) if spatial_squeeze and num_classes is not None: net = tf.squeeze(net, global_pool_idx, name="fc8/squeezed") net.default_image_size = 224 return net
def model(inputs, is_training, reuse): """ Constructs the airynet model given the inputs. """ with tf.variable_scope("nn_in"): inp_shape = inputs.get_shape() print("Original Image dimensions: {}".format(inp_shape)) if data_format == "channels_first": # Convert from channels_last (channels_last) to # channels_first (channels_first). # This provides a large performance boost on GPU. inputs = tf.transpose(inputs, [0, 3, 1, 2]) print( "Transpose the inputs to channels_first, from: {} to: {}". format(inp_shape, inputs.get_shape())) print("_____________") def log_act_helper(x): return tf.where(tf.greater(x, 0), log_act(x), log_act(x, False)) # this is the log layer inputs = conv2d_fixed_padding( inputs=inputs, filters=64, kernel_size=7, strides=2, data_format=data_format, reuse=reuse, activation=log_act_helper if cfg.use_log_act else None) inputs = tf.identity(inputs, "initial_conv") print("After the first convolution: {}".format(inputs.get_shape())) inputs = tf.layers.max_pooling2d(inputs=inputs, pool_size=3, strides=2, padding="SAME", data_format=data_format) inputs = tf.identity(inputs, "initial_max_pool") print("After Max Pooling with pool size three: {}".format( inputs.get_shape())) with tf.variable_scope("first_block"): inputs = block_layer_fn(inputs=inputs, filters=64, layer_fn=layer_fn, block_layer=layers[0], strides=1, is_training=is_training, name="first_block_layer_fn", data_format=data_format, relu_leakiness=relu_leakiness, reuse=reuse) print("After the first block: {}".format(inputs.get_shape())) with tf.variable_scope("second_block"): inputs = block_layer_fn(inputs=inputs, filters=128, layer_fn=layer_fn, block_layer=layers[1], strides=2, is_training=is_training, name="second_block_layer_fn", data_format=data_format, relu_leakiness=relu_leakiness, reuse=reuse) print("After the second block: {}".format(inputs.get_shape())) with tf.variable_scope("third_block"): inputs = block_layer_fn(inputs=inputs, filters=256, layer_fn=layer_fn, block_layer=layers[2], strides=2, is_training=is_training, name="third_block_layer_fn", data_format=data_format, relu_leakiness=relu_leakiness, reuse=reuse) print("After the third block: {}".format(inputs.get_shape())) with tf.variable_scope("fourth_block"): inputs = block_layer_fn(inputs=inputs, filters=512, layer_fn=layer_fn, block_layer=layers[3], strides=2, is_training=is_training, name="last_block_before_fc", data_format=data_format, relu_leakiness=relu_leakiness, reuse=reuse) print("After the fourth block: {}".format(inputs.get_shape())) with tf.variable_scope("nn_out"): inputs = batch_norm_act_fun(inputs, is_training, data_format, relu_leakiness, reuse=reuse) inputs = tf.layers.average_pooling2d(inputs=inputs, pool_size=7, strides=1, padding="VALID", data_format=data_format) inputs = tf.identity(inputs, "final_avg_pool") print("After Max Pooling with pool size seven: {}".format( inputs.get_shape())) inputs = tf.reshape( inputs, [-1, 512 if layer_fn is building_layer else 2048]) print("After reshaping, to serve the fc: {}".format( inputs.get_shape())) inputs = tf.layers.dense( inputs=inputs, units=num_classes, kernel_regularizer=tf.contrib.layers.l1_l2_regularizer( scale_l1=cfg.l1_regularization_scale, scale_l2=cfg.l2_regularization_scale)) inputs = tf.identity(inputs, "final_dense") print("After final fc layer: {}".format(inputs.get_shape())) return inputs