Esempio n. 1
0
    def GetAttentionPrelogit(
            self,
            images,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False,
            use_batch_norm=True):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
        # Construct Resnet50 features.
        with arg_scope(
                resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
            _, end_points = self.GetResnet50Subnetwork(
                images, is_training=training_resnet, reuse=reuse)

        feature_map = end_points[self._target_layer_type]

        # Construct attention subnetwork on top of features.
        with arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay,
                                           use_batch_norm=use_batch_norm)):
            with arg_scope([layers.batch_norm],
                           is_training=training_attention):
                (prelogits, attention_prob, attention_score,
                 end_points) = self._GetAttentionSubnetwork(
                     feature_map,
                     end_points,
                     attention_nonlinear=attention_nonlinear,
                     attention_type=attention_type,
                     kernel=kernel,
                     reuse=reuse)

        return prelogits, attention_prob, attention_score, feature_map, end_points
Esempio n. 2
0
    def extract_features(self, inputs):
        im_centered = self.center_inputs(inputs)
        net_type = self.cfg['net_type']
        if 'resnet' in net_type:
            net_fun = net_funcs[net_type]
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = net_fun(im_centered,
                                          global_pool=False,
                                          output_stride=16,
                                          is_training=False)
        elif 'mobilenet' in net_type:
            net_fun = net_funcs[net_type]
            with slim.arg_scope(mobilenet_v2.training_scope()):
                net, end_points = net_fun(im_centered)
        elif 'efficientnet' in net_type:
            if 'use_batch_norm' not in self.cfg.keys():
                self.cfg['use_batch_norm'] = False
            if 'use_drop_out' not in self.cfg.keys():
                self.cfg['use_drop_out'] = False

            im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3])
            net, end_points = eff.build_model_base(
                im_centered,
                net_type,
                use_batch_norm=self.cfg['use_batch_norm'],
                drop_out=self.cfg['use_drop_out'])
        else:
            raise ValueError(f"Unknown network of type {net_type}")
        return net, end_points
Esempio n. 3
0
    def _GetAttentionModel(
            self,
            images,
            num_classes,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

        attention_feat, attention_prob, attention_score, feature_map, _ = (
            self.GetAttentionPrelogit(images,
                                      weight_decay,
                                      attention_nonlinear=attention_nonlinear,
                                      attention_type=attention_type,
                                      kernel=kernel,
                                      training_resnet=training_resnet,
                                      training_attention=training_attention,
                                      reuse=reuse))
        with arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay,
                                           batch_norm_scale=True)):
            with arg_scope([layers.batch_norm],
                           is_training=training_attention):
                with tf.compat.v1.variable_scope(_ATTENTION_VARIABLE_SCOPE,
                                                 values=[attention_feat],
                                                 reuse=reuse):
                    logits = layers.conv2d(attention_feat,
                                           num_classes, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope='logits')
                    logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
        return logits, attention_prob, attention_score, feature_map
Esempio n. 4
0
    def extract_features(self, inputs):
        net_fun = net_funcs[self.cfg.net_type]
        mean = tf.constant(self.cfg.mean_pixel,
                           dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
        im_centered = inputs - mean

        # The next part of the code depends upon which tensorflow version you have.
        vers = tf.__version__
        vers = vers.split(".") #Updated based on https://github.com/AlexEMG/DeepLabCut/issues/44
        if int(vers[0])==1 and int(vers[1])<4: #check if lower than version 1.4.
            with slim.arg_scope(resnet_v1.resnet_arg_scope(False)):
                net, end_points = net_fun(im_centered,
                                          global_pool=False, output_stride=self.cfg.output_stride)
        else:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = net_fun(im_centered,
                                          global_pool=False, output_stride=self.cfg.output_stride,is_training=False)

        return net,end_points
Esempio n. 5
0
 def extract_features(self, inputs):
     net_fun = net_funcs[self.cfg["net_type"]]
     im_centered = self.center_inputs(inputs)
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         net, end_points = net_fun(
             im_centered,
             global_pool=False,
             output_stride=16,
             is_training=False,
         )
     return net, end_points