def vgg_dual_16(inputs1, inputs2, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16', update_top_only = False, fc_conv_padding='VALID', reuse = False): with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs1]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): nets = [] for i, inputs in enumerate([inputs1, inputs2]): print(i > 0) # with slim.arg_scope(vgg_arg_scope(reuse = tf.compat.v1.AUTO_REUSE or (i > 0))): with slim.arg_scope(vgg_arg_scope(reuse = reuse or (i > 0))): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') # if update_top_only: # net = tf.stop_gradient(net) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') nets.append(net) with slim.arg_scope(vgg_arg_scope(reuse = reuse)): net = tf.concat(nets, 3) net = slim.conv2d(net, 512, [1, 1], scope='conv6') net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool6') net = slim.conv2d(net, 512, [1, 1], scope='conv7') net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool7') #net = slim.max_pool2d(net, [2, 2], scope='pool6') # Use conv2d instead of fully_connected layers. #net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.conv2d(net, 2048, [7, 7], padding=fc_conv_padding, scope = 'fc6_') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout6') # net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7') net = slim.conv2d(net, 2048, [1, 1], scope='fc7_') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7_') if num_classes is not None: net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn = None, scope = 'fc8') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def define_vggish_slim(training=False): """Defines the VGGish TensorFlow model. All ops are created in the current default graph, under the scope 'vggish/'. The input is a placeholder named 'vggish/input_features' of type float32 and shape [batch_size, num_frames, num_bands] where batch_size is variable and num_frames and num_bands are constants, and [num_frames, num_bands] represents a log-mel-scale spectrogram patch covering num_bands frequency bands and num_frames time frames (where each frame step is usually 10ms). This is produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET). The output is an op named 'vggish/embedding' which produces the activations of a 128-D embedding layer, which is usually the penultimate layer when used as part of a full model with a final classifier layer. Args: training: If true, all parameters are marked trainable. Returns: The op 'vggish/embeddings'. """ # Defaults: # - All weights are initialized to N(0, INIT_STDDEV). # - All biases are initialized to 0. # - All activations are ReLU. # - All convolutions are 3x3 with stride 1 and SAME padding. # - All max-pools are 2x2 with stride 2 and SAME padding. with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=params.INIT_STDDEV), biases_initializer=tf.zeros_initializer(), activation_fn=tf.nn.relu, trainable=training), \ slim.arg_scope([slim.conv2d], kernel_size=[3, 3], stride=1, padding='SAME'), \ slim.arg_scope([slim.max_pool2d], kernel_size=[2, 2], stride=2, padding='SAME'), \ tf.variable_scope('vggish'): # Input: a batch of 2-D log-mel-spectrogram patches. features = tf.placeholder(tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS), name='input_features') # Reshape to 4-D so that we can convolve a batch with conv2d(). net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1]) # The VGG stack of alternating convolutions and max-pools. net = slim.conv2d(net, 64, scope='conv1') net = slim.max_pool2d(net, scope='pool1') net = slim.conv2d(net, 128, scope='conv2') net = slim.max_pool2d(net, scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3') net = slim.max_pool2d(net, scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4') net = slim.max_pool2d(net, scope='pool4') # Flatten before entering fully-connected layers net = slim.flatten(net) net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1') # The embedding layer. net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2') return tf.identity(net, name='embedding')
def build_head(self, is_training): # Main network # Layer 1 net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') # Layer 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') # Layer 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') # Layer 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') # Layer 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') # Append network to summaries self._act_summaries.append(net) # Append network as head layer self._layers['head'] = net return net
def vgg_19(inputs, num_classes=1000, is_training=False, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19', reuse = False, fc_conv_padding='VALID'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2',reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4',reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5',reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) return net, end_points
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. This function reconstructs the "second half" of the Inception ResNet v2 network after the part defined in `_extract_proposal_features`. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name. Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights): with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope( weight_decay=self._weight_decay)): # Forces is_training to False to disable batch norm update. with slim.arg_scope([slim.batch_norm], is_training=self._train_batch_norm): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d( tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d( proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d( tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d( proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d( tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d( proposal_feature_maps, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20) net = inception_resnet_v2.block8(net, activation_fn=None) proposal_classifier_features = slim.conv2d( net, 1536, 1, scope='Conv2d_7b_1x1') return proposal_classifier_features
def vgg_16(inputs, scope='vgg_16'): with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d]): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') return net
def vgg_16_base(inputs, is_training=True, scope='vgg_16', fc_conv_padding='VALID', final_endpoint=None): """ VGG16模型 :param inputs:a tensor [batch_size, height, width, channels] :param num_classes:分类数 :param is_training: 是否训练 :param dropout_keep_prob: 训练时dropout保持激活的可能性 :param spatial_squeeze:是否压缩输出的空间维度 :param scope:变量的可选范围 :param fc_conv_padding: 全连接层的填充类型 'SAME' or 'VALID' :param global_pool: a boolean flag .True: 则对分类模块的输入需用平均池化 :return: net: VGG net end_points :a dict of tensors with intermediate activations. """ end_points = {} with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') end_point = 'pool4' end_points[end_point] = net if end_point == final_endpoint: return net, end_points net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') end_point = 'conv5_2' end_points[end_point] = net if end_point == final_endpoint: return net, end_points net = slim.repeat(net, 1, slim.conv2d, 512, [3, 3], scope='conv5_3') net = slim.max_pool2d(net, [2, 2], scope='pool5') # # Convert end_points_collection into a end_point dict. # end_points = slim.utils.convert_collection_to_dict(end_points_collection) return net, end_points
def create_network(inputs, is_training, scope="win19_dep9", reuse=False): num_maps = 64 kw = 3 kh = 3 with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope([slim.conv2d], padding='VALID', activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): net = slim.conv2d(inputs, num_maps, [kh, kw], scope='conv_bn_relu1') net = slim.repeat(net, 7, slim.conv2d, num_maps, [kh, kw], scope='conv_bn_relu2_8') net = slim.conv2d(net, num_maps, [kh, kw], scope='conv9', activation_fn=None, normalizer_fn=None) net = slim.batch_norm(net, is_training=is_training) return net
def call(self, inputs, step_type=None, network_states=()): del step_type # unused. states, goals = inputs with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=NORMALIZER_FN, normalizer_params=NORMALIZER_PARAMS): with tf.compat.v1.variable_scope('encode_dynamics'): positions = states['position'] next_positions = goals['position'] delta_positions = tf.subtract(next_positions, positions, 'delta_positions') net = tf.concat([positions, delta_positions], axis=-1) net = slim.repeat(net, 2, slim.fully_connected, self._dim_fc_state, scope='fc') dynamics_feats = tf.identity(net, 'dynamics_feats') with tf.compat.v1.variable_scope('global_pool'): dynamics_feats = global_pool(dynamics_feats, axis=1, mask=states['body_mask'], mode='reduce_sum') with tf.compat.v1.variable_scope('inference'): net = dynamics_feats net = slim.fully_connected(net, self._dim_fc_state, scope='fc') gaussian_params = slim.fully_connected(net, 2 * self._dim_c, activation_fn=None, normalizer_fn=None, scope='gaussian_params') c_means = tf.identity(gaussian_params[..., :self._dim_c], name='c_means') c_stddevs = tf.add(tf.nn.softplus( gaussian_params[..., self._dim_c:]), 1e-6, name='c_stddevs') return c_means, c_stddevs
def _decoder_with_concat_merge(decoder_features_list, decoder_depth, decoder_use_separable_conv=True, weight_decay=0.0001, scope_suffix=''): """Decoder with concatenation to merge features. This decoder method applies two convolutions to smooth the features obtained by concatenating the input decoder_features_list. This decoder module is proposed in the DeepLabv3+ paper. Args: decoder_features_list: A list of decoder features. decoder_depth: Integer, the filters used in the convolution. decoder_use_separable_conv: Boolean, use separable conv or not. weight_decay: Weight decay for the model variables. scope_suffix: String, used in the scope suffix. Returns: decoder features merged with concatenation. """ if decoder_use_separable_conv: decoder_features = split_separable_conv2d( tf.concat(decoder_features_list, 3), filters=decoder_depth, rate=1, weight_decay=weight_decay, scope='decoder_conv0' + scope_suffix) decoder_features = split_separable_conv2d(decoder_features, filters=decoder_depth, rate=1, weight_decay=weight_decay, scope='decoder_conv1' + scope_suffix) else: num_convs = 2 decoder_features = slim.repeat(tf.concat(decoder_features_list, 3), num_convs, slim.conv2d, decoder_depth, 3, scope='decoder_conv' + scope_suffix) return decoder_features
def encode_relation(positions, body_masks, dim_fc_state): """Encode the relation feature. Args: positions: Positions of the bodies. body_masks: Masks of valid bodies. dim_fc_state: Dimension of state encoding. Returns: A tensor of shape [batch_size, num_bodies, dim_fc_state]. """ with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=NORMALIZER_FN, normalizer_params=NORMALIZER_PARAMS): with tf.compat.v1.variable_scope('relation_masks'): body_masks = tf.identity(body_masks, 'body_masks') relation_masks = tf.subtract( tf.multiply(tf.expand_dims(body_masks, -1), tf.expand_dims(body_masks, -2)), tf.linalg.diag(body_masks)) relation_masks = tf.expand_dims(relation_masks, axis=-1) with tf.compat.v1.variable_scope('relation_feats'): net = tf.subtract(tf.expand_dims(positions, axis=1), tf.expand_dims(positions, axis=2)) net = slim.repeat(net, 2, slim.fully_connected, dim_fc_state, scope='fc') relation_feats = net return tf.reduce_sum(relation_feats * relation_masks, axis=1, name='sum_relation_feats')
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a', fc_conv_padding='VALID', global_pool=False): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original VGG architecture.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ with tf.compat.v1.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) if global_pool: net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') end_points['global_pool'] = net if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def vgg_16(inputs, reuse=False, pooling='avg', final_endpoint='fc8'): """VGG-16 implementation intended for test-time use. It takes inputs with values in [0, 1] and preprocesses them (scaling, mean-centering) before feeding them to the VGG-16 network. Args: inputs: A 4-D tensor of shape [batch_size, image_size, image_size, 3] and dtype float32, with values in [0, 1]. reuse: bool. Whether to reuse model parameters. Defaults to False. pooling: str in {'avg', 'max'}, which pooling operation to use. Defaults to 'avg'. final_endpoint: str, specifies the endpoint to construct the network up to. Defaults to 'fc8'. Returns: A dict mapping end-point names to their corresponding Tensor. Raises: ValueError: the final_endpoint argument is not recognized. """ inputs *= 255.0 inputs -= tf.constant([123.68, 116.779, 103.939], dtype=tf.float32) pooling_fns = {'avg': slim.avg_pool2d, 'max': slim.max_pool2d} pooling_fn = pooling_fns[pooling] with tf.variable_scope('vgg_16', [inputs], reuse=reuse) as sc: end_points = {} def add_and_check_is_final(layer_name, net): end_points['%s/%s' % (sc.name, layer_name)] = net return layer_name == final_endpoint with slim.arg_scope([slim.conv2d], trainable=False): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') if add_and_check_is_final('conv1', net): return end_points net = pooling_fn(net, [2, 2], scope='pool1') if add_and_check_is_final('pool1', net): return end_points net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') if add_and_check_is_final('conv2', net): return end_points net = pooling_fn(net, [2, 2], scope='pool2') if add_and_check_is_final('pool2', net): return end_points net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') if add_and_check_is_final('conv3', net): return end_points net = pooling_fn(net, [2, 2], scope='pool3') if add_and_check_is_final('pool3', net): return end_points net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') if add_and_check_is_final('conv4', net): return end_points net = pooling_fn(net, [2, 2], scope='pool4') if add_and_check_is_final('pool4', net): return end_points net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') if add_and_check_is_final('conv5', net): return end_points net = pooling_fn(net, [2, 2], scope='pool5') if add_and_check_is_final('pool5', net): return end_points # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') if add_and_check_is_final('fc6', net): return end_points net = slim.dropout(net, 0.5, is_training=False, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') if add_and_check_is_final('fc7', net): return end_points net = slim.dropout(net, 0.5, is_training=False, scope='dropout7') net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, scope='fc8') end_points[sc.name + '/predictions'] = slim.softmax(net) if add_and_check_is_final('fc8', net): return end_points raise ValueError('final_endpoint (%s) not recognized' % final_endpoint)
def inception_resnet_v2_base(inputs, final_endpoint='Conv2d_7b_1x1', output_stride=16, align_feature_maps=False, scope=None, activation_fn=tf.nn.relu): """Inception model from http://arxiv.org/abs/1602.07261. Constructs an Inception Resnet v2 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Conv2d_7b_1x1. Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] output_stride: A scalar that specifies the requested ratio of input to output spatial resolution. Only supports 8 and 16. align_feature_maps: When true, changes all the VALID paddings in the network to SAME padding so that the feature maps are aligned. scope: Optional variable_scope. activation_fn: Activation function for block scopes. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or if the output_stride is not 8 or 16, or if the output_stride is 8 and we request an end point after 'PreAuxLogits'. """ if output_stride != 8 and output_stride != 16: raise ValueError('output_stride must be 8 or 16.') padding = 'SAME' if align_feature_maps else 'VALID' end_points = {} def add_and_check_final(name, net): end_points[name] = net return name == final_endpoint with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding=padding, scope='Conv2d_2a_3x3') if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_3a_3x3') if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding=padding, scope='Conv2d_3b_1x1') if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding=padding, scope='Conv2d_4a_3x3') if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_5a_3x3') if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat( [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) if add_and_check_final('Mixed_5b', net): return net, end_points # TODO(alemi): Register intermediate endpoints net = slim.repeat(net, 10, block35, scale=0.17, activation_fn=activation_fn) # 17 x 17 x 1088 if output_stride == 8, # 33 x 33 x 1088 if output_stride == 16 use_atrous = output_stride == 8 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) if add_and_check_final('Mixed_6a', net): return net, end_points # TODO(alemi): register intermediate endpoints with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): net = slim.repeat(net, 20, block17, scale=0.10, activation_fn=activation_fn) if add_and_check_final('PreAuxLogits', net): return net, end_points if output_stride == 8: # TODO(gpapan): Properly support output_stride for the rest of the net. raise ValueError( 'output_stride==8 is only supported up to the ' 'PreAuxlogits end_point for now.') # 8 x 8 x 2080 with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) if add_and_check_final('Mixed_7a', net): return net, end_points # TODO(alemi): register intermediate endpoints net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn) net = block8(net, activation_fn=None) # 8 x 8 x 1536 net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
def vgg_16_hed_cam( inputs, cams, num_classes=1, is_training=True, add_v1net_early=False, add_v1net=False, reuse=None, reduce_conv=True, scope='vgg_16', ): """VGG-16 implementation of HED. Args: inputs: a tensor of size [batch_size, height, width, channels]. is_training: whether or not the model is being trained. add_v1net: whether to add v1net blocks after convolutions. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional scope for the variables. Returns: side_outputs_fullres: list of side output logits resized to input resolution. end_points: a dict of tensors with intermediate activations. """ side_outputs = [] _, h, w, _ = inputs.shape.as_list() with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') with tf.variable_scope("cam-conv1"): cam_net = slim.repeat(cams, 1, slim.conv2d, 64, [3, 3], scope="cam-conv1") net = net + cam_net if add_v1net_early and FLAGS.v1_timesteps: with tf.variable_scope("v1net-conv1"): v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 64 net = build_v1net(inputs=net, filters=n_filters, timesteps=v1_timesteps, kernel_size=v1_kernel_size, is_training=is_training) side_outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool1') cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') with tf.variable_scope("cam-conv2"): cam_net = slim.repeat(cam_net, 1, slim.conv2d, 128, [3, 3], scope="cam-conv2") net = net + cam_net if add_v1net and FLAGS.v1_timesteps: with tf.variable_scope("v1net-conv2"): v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 128 net = build_v1net(inputs=net, filters=n_filters, timesteps=v1_timesteps, kernel_size=v1_kernel_size, is_training=is_training) side_outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool2') cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') with tf.variable_scope("cam-conv3"): cam_net = slim.repeat(cam_net, 1, slim.conv2d, 256, [3, 3], scope="cam-conv3") net = net + cam_net if add_v1net and FLAGS.v1_timesteps: with tf.variable_scope("v1net-conv3"): v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 256 net = build_v1net(inputs=net, filters=n_filters, timesteps=v1_timesteps, kernel_size=v1_kernel_size, is_training=is_training) side_outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool3') cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') with tf.variable_scope("cam-conv4"): cam_net = slim.repeat(cam_net, 1, slim.conv2d, 512, [3, 3], scope="cam-conv4") net = net + cam_net if add_v1net and FLAGS.v1_timesteps: with tf.variable_scope("v1net-conv4"): v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 512 net = build_v1net(inputs=net, filters=n_filters, timesteps=v1_timesteps, kernel_size=v1_kernel_size, is_training=is_training) side_outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool4') cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') with tf.variable_scope("cam-conv5"): cam_net = slim.repeat(cam_net, 1, slim.conv2d, 512, [3, 3], scope="cam-conv5") net = net + cam_net if add_v1net and FLAGS.v1_timesteps: with tf.variable_scope("v1net-conv5"): v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 512 net = build_v1net(inputs=net, filters=n_filters, timesteps=v1_timesteps, kernel_size=v1_kernel_size, is_training=is_training) side_outputs.append(net) end_points = slim.utils.convert_collection_to_dict( end_points_collection) side_outputs_fullres = [side_outputs[0]] side_outputs_fullres = [ tf.image.resize_bilinear(side_output, [h, w]) for side_output in side_outputs[1:] ] with tf.variable_scope("side_output_classifiers", reuse=reuse): side_outputs_fullres = [ slim.conv2d( side_output, 1, [1, 1], activation_fn=None, normalizer_fn=None, ) for side_output in side_outputs_fullres ] side_outputs_fullres = tf.stack(side_outputs_fullres, axis=0) if reduce_conv: with tf.variable_scope("side_output_fusion"): side_outputs_ = tf.transpose(side_outputs_fullres, (1, 2, 3, 4, 0)) side_outputs_ = tf.squeeze(side_outputs_, axis=3) fused_predictions = fuse_predictions(side_outputs_) else: fused_predictions = tf.reduce_mean(side_outputs_fullres, axis=0) end_points['fused_predictions'] = fused_predictions side_outputs_fullres = tf.reshape(side_outputs_fullres, (-1, h, w, 1)) end_points['side_outputs_fullres'] = side_outputs_fullres return fused_predictions, end_points
def vgg_16_hed( inputs, cams=None, num_classes=1, is_training=True, add_v1net_early=False, add_v1net=False, reuse=None, reduce_conv=True, scope='vgg_16', ): """VGG-16 implementation of HED. Args: inputs: a tensor of size [batch_size, height, width, channels]. is_training: whether or not the model is being trained. add_v1net: whether to add v1net blocks after convolutions. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional scope for the variables. Returns: side_outputs_fullres: list of side output logits resized to input resolution. end_points: a dict of tensors with intermediate activations. """ del cams # unused here side_outputs = [] _, h, w, _ = inputs.shape.as_list() with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, max_pool2d. with slim.arg_scope( [slim.conv2d, slim.max_pool2d], outputs_collections=end_points_collection, ): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = add_v1net_layer(net, is_training, add_v1net_early, 1) net = tf.layers.batch_normalization(net, training=is_training) with tf.variable_scope("dsn_convolution_1"): dsn_1 = slim.conv2d( net, 1, [1, 1], activation_fn=None, normalizer_fn=None, ) side_outputs.append(dsn_1) net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = add_v1net_layer(net, is_training, add_v1net, 2) net = tf.layers.batch_normalization(net, training=is_training) with tf.variable_scope("dsn_convolution_2"): # TODO(vveeraba): Replace following with deconvolution dsn_2 = resize_and_crop( slim.conv2d( net, 1, [1, 1], activation_fn=None, normalizer_fn=None, ), 2, h, w) side_outputs.append(dsn_2) net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = add_v1net_layer(net, is_training, add_v1net, 3) net = tf.layers.batch_normalization(net, training=is_training) with tf.variable_scope("dsn_convolution_3"): dsn_3 = resize_and_crop( slim.conv2d( net, 1, [1, 1], activation_fn=None, normalizer_fn=None, ), 4, h, w) side_outputs.append(dsn_3) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = add_v1net_layer(net, is_training, add_v1net, 4) net = tf.layers.batch_normalization(net, training=is_training) with tf.variable_scope("dsn_convolution_4"): dsn_4 = resize_and_crop( slim.conv2d( net, 1, [1, 1], activation_fn=None, normalizer_fn=None, ), 8, h, w) side_outputs.append(dsn_4) net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = add_v1net_layer(net, is_training, add_v1net, 5) net = tf.layers.batch_normalization(net, training=is_training) with tf.variable_scope("dsn_convolution_5"): dsn_5 = resize_and_crop( slim.conv2d( net, 1, [1, 1], activation_fn=None, normalizer_fn=None, ), 16, h, w) side_outputs.append(dsn_5) end_points = slim.utils.convert_collection_to_dict( end_points_collection) side_outputs = tf.stack(side_outputs, axis=0) with tf.variable_scope("side_output_fusion"): side_outputs_ = tf.squeeze(tf.transpose( side_outputs, (1, 2, 3, 4, 0)), axis=3) fused_predictions = slim.conv2d( side_outputs_, 1, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=tf.constant_initializer(0.2), ) end_points['fused_predictions'] = fused_predictions side_outputs_fullres = tf.reshape(side_outputs, (-1, h, w, 1)) end_points['side_outputs_fullres'] = side_outputs_fullres return fused_predictions, end_points
def vgg_16_fcn8s(inputs, num_classes=19, is_training=True, dropout_keep_prob=0.5, scope='vgg_16_fcn8s'): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ net = inputs with ExitStack() as cm: cm.enter_context(slim.arg_scope(vgg_arg_scope())) sc = cm.enter_context(tf.variable_scope(scope, 'vgg_16', [inputs])) end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. cm.enter_context( slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection)) #net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = tf.pad(net, [[0, 0], [100, 100], [100, 100], [0, 0]]) net = slim.conv2d(net, 64, 3, padding='VALID', scope='conv1/conv1_1') net = slim.conv2d(net, 64, 3, scope='conv1/conv1_2') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = pool3 = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = pool4 = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=tf.zeros_initializer(), scope='fc8') upscore2a = upscale(net, 2, name='upscore2a') tf.add_to_collection(end_points_collection, upscore2a) score_pool4 = slim.conv2d(pool4 * 0.01, 19, 1, activation_fn=None, weights_initializer=tf.zeros_initializer(), scope='score_pool4') score_pool4c = crop(score_pool4, upscore2a, 5, name='score_pool4c') tf.add_to_collection(end_points_collection, score_pool4c) fuse_pool4 = tf.add(upscore2a, score_pool4c, name='fuse_pool4') tf.add_to_collection(end_points_collection, fuse_pool4) upscore_pool4a = upscale(fuse_pool4, 2, name='upscore_pool4a') tf.add_to_collection(end_points_collection, upscore_pool4a) score_pool3 = slim.conv2d(pool3 * 0.0001, 19, 1, activation_fn=None, weights_initializer=tf.zeros_initializer(), scope='score_pool3') score_pool3c = crop(score_pool3, upscore_pool4a, 9, name='score_pool3c') tf.add_to_collection(end_points_collection, score_pool3c) fuse_pool3 = tf.add(upscore_pool4a, score_pool3c, name='fuse_pool3') tf.add_to_collection(end_points_collection, fuse_pool3) upscore8a = upscale(fuse_pool3, 8, name='upscore8a') tf.add_to_collection(end_points_collection, upscore8a) net = score = crop(upscore8a, inputs, 31, name='score') tf.add_to_collection(end_points_collection, score) # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def encode_effect(states, contexts, use_relation, use_point_cloud, dim_fc_state, dim_fc_context): """Encode the effect feature. Args: states: The state as a dict. contexts: The context data. Set to None if no contexts are used. use_relation: True if use relation encoding. use_point_cloud: True if point cloud data is used. dim_fc_state: Dimension of state encoding. dim_fc_context: Dimension of context encoding. Returns: A tensor of shape [batch_size, dim_fc_state]. """ positions = states['position'] body_masks = states['body_mask'] num_bodies = int(body_masks.shape[-1]) with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=NORMALIZER_FN, normalizer_params=NORMALIZER_PARAMS): features = [] with tf.compat.v1.variable_scope('encode_position'): position_feats = slim.fully_connected(positions, dim_fc_state, scope='fc') features.append(position_feats) if use_relation: with tf.compat.v1.variable_scope('encode_relation'): relation_feats = encode_relation(positions, body_masks, dim_fc_state=dim_fc_state) features.append(relation_feats) if use_point_cloud: cloud_feats = states['cloud_feat'] features.append(cloud_feats) if contexts is not None: with tf.compat.v1.variable_scope('encode_context'): context_feats = slim.fully_connected(contexts, dim_fc_context, scope='fc') context_feats = tf.tile(tf.expand_dims(context_feats, 1), [1, num_bodies, 1]) features.append(context_feats) net = tf.concat(features, axis=-1) net = slim.repeat(net, 2, slim.fully_connected, dim_fc_state, scope='fc') effects = tf.identity(net, 'effects') return effects
def vgg_19(inputs, y, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, reuse=None, scope='vgg_19', fc_conv_padding='VALID', global_pool=False): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original VGG architecture.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the non-dropped-out input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ scopes = [] outputs= [] if True: tf.get_variable_scope()._reuse=tf.AUTO_REUSE scope_name = tf.get_variable_scope().name end_points_collection = tf.get_variable_scope().name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') scopes.append('conv1') outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool1') scopes.append('pool1') outputs.append(net) net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') scopes.append('conv2') outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool2') scopes.append('pool2') outputs.append(net) net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') scopes.append('conv3') outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool3') scopes.append('pool3') outputs.append(net) net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') scopes.append('conv4') outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool4') scopes.append('pool4') outputs.append(net) net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') scopes.append('conv5') outputs.append(net) net = slim.max_pool2d(net, [2, 2], scope='pool5') scopes.append('pool5') outputs.append(net) # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') scopes.append('fc6') outputs.append(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') scopes.append('dropout6') outputs.append(net) net = slim.conv2d(net, 4096, [1, 1], scope='fc7') scopes.append('fc7') outputs.append(net) net = slim.conv2d(net, 4096, [1, 1], scope='fc8') scopes.append('fc8') outputs.append(net) net = slim.conv2d(net, 4096, [1, 1], scope='fc9') scopes.append('fc9') outputs.append(net) net = slim.conv2d(net, 4096, [1, 1], scope='fc10') scopes.append('fc10') outputs.append(net) # Convert end_points_collection into a end_point dict. if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout10') scopes.append('dropout10') outputs.append(net) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc11') with tf.variable_scope("fc11"): net = tf.squeeze(net, [1, 2], name="squzzezd") _, indexs = tf.math.top_k(net,5) def fn(args): y,index = args return tf.gather(y,index) acc_array = tf.vectorized_map(fn,(y,indexs)) top_accuracy = tf.reduce_sum(acc_array,name="top_accuracy") loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=net) loss = tf.reduce_mean(loss) scopes.append('fc11') outputs.append(loss) return loss, outputs,scopes
def vgg_19(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19', fc_conv_padding='VALID', global_pool=False): """ VGG19模型 :param inputs:a tensor [batch_size, height, width, channels] :param num_classes:分类数 :param is_training: 是否训练 :param dropout_keep_prob: 训练时dropout保持激活的可能性 :param spatial_squeeze:是否压缩输出的空间维度 :param scope:变量的可选范围 :param fc_conv_padding: 全连接层的填充类型 'SAME' or 'VALID' :param global_pool: a boolean flag .True: 则对分类模块的输入需用平均池化 :return: net: VGG net end_points :a dict of tensors with intermediate activations. """ with tf.compat.v1.variable_scope(scope, 'vgg_19', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if global_pool: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points