def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def get_slim_arch_bn(inputs, isTrainTensor, num_classes=1000, scope='vgg_16'): with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. filters = 64 # Arg scope set default parameters for a list of ops with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat( inputs, 2, layers.conv2d, filters, [3, 3], scope='conv1', weights_regularizer=slim.l2_regularizer(0.01)) bn_0 = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn1', decay=0.9) p_0 = layers_lib.max_pool2d(bn_0, [2, 2], scope='pool1') net = layers_lib.repeat( p_0, 2, layers.conv2d, filters, [3, 3], scope='conv2', weights_regularizer=slim.l2_regularizer(0.01)) bn_1 = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn2', decay=0.9) res_1 = p_0 + bn_1 p_1 = layers_lib.max_pool2d(res_1, [2, 2], scope='pool2') net = layers_lib.repeat( p_1, 3, layers.conv2d, filters, [4, 4], scope='conv3', weights_regularizer=slim.l2_regularizer(0.01)) bn_2 = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn3', decay=0.9) res_2 = p_1 + bn_2 p_2 = layers_lib.max_pool2d(res_2, [2, 2], scope='pool3') net = layers_lib.repeat( p_2, 3, layers.conv2d, filters, [5, 5], scope='conv4', weights_regularizer=slim.l2_regularizer(0.01)) bn_3 = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn4', decay=0.9) res_3 = p_2 + bn_3 p_3 = layers_lib.max_pool2d(res_3, [2, 2], scope='pool4') last_conv = net = layers_lib.repeat( p_3, 3, layers.conv2d, filters, [5, 5], scope='conv5', weights_regularizer=slim.l2_regularizer(0.01)) # Here we have 14x14 filters net = tf.reduce_mean(net, [1, 2]) # Global average pooling # add layer with float 32 mask of same shape as global average pooling out # feed default with ones, leave placeholder mask = tf.placeholder_with_default(tf.ones_like(net), shape=net.shape, name='gap_mask') net = tf.multiply(net, mask) net = layers_lib.fully_connected(net, num_classes, activation_fn=None, biases_initializer=None, scope='softmax_logits') with tf.variable_scope("raw_CAM"): w_tensor_name = "vgg_16/softmax_logits/weights:0" s_w = tf.get_default_graph().get_tensor_by_name(w_tensor_name) softmax_weights = tf.expand_dims(tf.expand_dims(s_w, 0), 0) # reshape to match 1x1xFxC # tensor mult from (N x lh x lw x F) , (1 x 1 x F x C) cam = tf.tensordot(last_conv, softmax_weights, [[3], [2]], name='cam_out') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) return net, end_points
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope([layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): feats = [] net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') feats.append(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') feats.append(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') feats.append(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') feats.append(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') feats.append(net) return feats
with slim.arg_scope(vgg.vgg_arg_scope()): # logits, end_points= vgg.vgg_16(images, num_classes=3, spatial_squeeze=False) with tf.variable_scope('vgg_16', 'vgg_16', [images]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([ layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d ], outputs_collections=end_points_collection): dropout_keep_prob = 0.5 is_training = True net = layers_lib.repeat(images, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers.conv2d(net, number_of_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) logits = net print(logits.get_shape()) print(labels.shape)
def _create_inference(self, inputs, is_training=True, dropout_keep_prob=0.5): """ Define the inference model for the network Args: Returns: """ if self.model_version == 'VGG16': with tf.variable_scope('vgg_16'): with framework.arg_scope([ layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d ]): # From tensorflow.contrib.slim.nets.vgg.vgg_16 net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Custom fc layers to allow variable input image size net = layers.conv2d(net, 4096, self.fc6_dims, padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d(net, self.lbls_dim, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') logits = net elif self.model_version == 'VGG19': with tf.variable_scope('vgg_19'): with framework.arg_scope([ layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d ]): # From tensorflow.contrib.slim.nets.vgg.vgg_19 net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Custom fc layers to allow variable input image size net = layers.conv2d(net, 4096, self.fc6_dims, padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d(net, self.lbls_dim, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') logits = net return logits
def inference_vanilla_cnn(images, in_training=True): inputs = images scope = '' with variable_scope.variable_scope(scope, 'cifar10', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, max_pool2d with arg_scope([ layers.conv2d, layers.fully_connected, layers_lib.max_pool2d, layers.batch_norm ], outputs_collections=end_points_collection): # Apply specific parameters to all conv2d layers (to use batch norm and relu - relu is by default) with arg_scope( [layers.conv2d, layers.fully_connected], weights_regularizer=regularizers.l2_regularizer(0.0005), weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=None, normalizer_fn=tf.layers.batch_normalization, normalizer_params=dict( center=True, scale=True, #momentum=BATCHNORM_MOVING_AVERAGE_DECAY, # Decay for the moving averages. epsilon=0.001, # epsilon to prevent 0s in variance. training=in_training)): net = layers_lib.repeat(inputs, 1, layers.conv2d, 32, [3, 3], scope='conv1') _activation_summary(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 32, [3, 3], scope='conv2') _activation_summary(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 1, layers.conv2d, 32, [3, 3], scope='conv3') _activation_summary(net) net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = tf.reshape(net, [net.shape[0], -1]) net = layers.fully_connected( net, NUM_CLASSES, scope='fc4', activation_fn=None, normalizer_fn=None, biases_initializer=tf.constant_initializer(0)) return net
def inference(images, in_training=True): inputs = images scope = '' with variable_scope.variable_scope(scope, 'cifar10', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, max_pool2d with arg_scope([ layers.conv2d, dau_conv2d, layers.fully_connected, layers_lib.max_pool2d, layers.batch_norm ], outputs_collections=end_points_collection): # Apply specific parameters to all conv2d layers (to use batch norm and relu - relu is by default) with arg_scope( [layers.conv2d, dau_conv2d, layers.fully_connected], weights_regularizer=regularizers.l2_regularizer(0.1), weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), #weights_initializer= lambda shape,dtype=tf.float32, partition_info=None: tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=None, #normalizer_fn=layers.batch_norm, #normalizer_params={'center': True, # 'scale': True, # #'is_training': in_training, # 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, # Decay for the moving averages. # 'epsilon': 0.001, # epsilon to prevent 0s in variance. # 'data_format':'NCHW', # }, normalizer_fn=tf.layers.batch_normalization, normalizer_params=dict( center=True, scale=True, #momentum=BATCHNORM_MOVING_AVERAGE_DECAY, # Decay for the moving averages. epsilon=0.001, # epsilon to prevent 0s in variance. axis=1, training=in_training)): inputs = tf.transpose(inputs, [0, 3, 1, 2]) print("input: ", inputs.shape) net = layers_lib.repeat( inputs, 1, dau_conv2d, 96, dau_units=(2, 2), max_kernel_size=25, mu2_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=1), mu1_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=2), #mu2_initializer=tf.constant_initializer(0), #mu1_initializer=tf.constant_initializer(0), dau_unit_border_bound=0.0, mu_learning_rate_factor=1, data_format='NCHW', scope='dau_conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1', data_format="NCHW") #''' net = layers_lib.repeat( net, 1, dau_conv2d, 96, dau_units=(2, 2), max_kernel_size=12, mu2_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=1), mu1_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=2), #mu2_initializer=tf.constant_initializer(0), #mu1_initializer=tf.constant_initializer(0), dau_unit_border_bound=0.0, mu_learning_rate_factor=1, data_format='NCHW', scope='dau_conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2', data_format="NCHW") net = layers_lib.repeat( net, 1, dau_conv2d, 192, dau_units=(2, 2), max_kernel_size=12, mu2_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=1), mu1_initializer=DAUGridMean(dau_units=(2, 2), max_value=4, dau_unit_axis=2), #mu2_initializer=tf.constant_initializer(0), #mu1_initializer=tf.constant_initializer(0), dau_unit_border_bound=0.0, mu_learning_rate_factor=1, data_format='NCHW', scope='dau_conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3', data_format="NCHW") #''' net = tf.reshape(net, [net.shape[0], -1]) net = layers.fully_connected( net, NUM_CLASSES, scope='fc4', activation_fn=None, normalizer_fn=None, biases_initializer=tf.constant_initializer(0)) return net
def get_slim_arch_bn(inputs, isTrainTensor, num_classes=1000, scope='vgg_16'): """ from vgg16 https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py :param inputs: :param num_classes: :param scope: :return: """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. # Arg scope set default parameters for a list of ops with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=isTrainTensor, scope='bn4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') # Here we have 14x14 filters net = tf.reduce_mean(net, [1, 2]) # Global average pooling net = layers_lib.fully_connected(net, num_classes, activation_fn=None, biases_initializer=None, scope='softmax_logits') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) return net, end_points