Exemple #1
0
def cam_inception(inputs,
                  num_classes=number_of_classes,
                  is_training=True,
                  reuse=None,
                  delta=0.6):

    with tf.variable_scope('InceptionV4', [inputs], reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4.inception_v4_base(inputs,
                                                             scope=scope)

    inception_c_feature = net
    with tf.variable_scope('cam_classifier/A'):
        net = slim.conv2d(
            inception_c_feature,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            padding='SAME',
            scope='conv1_3x3')
        net = slim.conv2d(
            net,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            padding='SAME',
            scope='conv2_3x3')
        net = slim.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            scope='conv3_1x1')
        end_points['features_A'] = net
        # GAP
        kernel_size = net.get_shape()[1:3]
        if kernel_size.is_fully_defined():
            net = slim.avg_pool2d(net,
                                  kernel_size,
                                  padding='VALID',
                                  scope='AvgPool_1a')
        else:
            net = tf.reduce_mean(net, [1, 2],
                                 keep_dims=True,
                                 name='global_pool')

        logits = slim.flatten(net, scope='Flatten')
        end_points['Logits'] = logits
        end_points['Predictions_A'] = tf.argmax(logits,
                                                1,
                                                name='Predictions_A')

    return logits, end_points
Exemple #2
0
 def testBuildOnlyUpToFinalEndpoint(self):
     batch_size = 5
     height, width = 299, 299
     all_endpoints = [
         'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
         'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
         'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
         'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
         'Mixed_7b', 'Mixed_7c', 'Mixed_7d'
     ]
     for index, endpoint in enumerate(all_endpoints):
         with tf.Graph().as_default():
             inputs = tf.random_uniform((batch_size, height, width, 3))
             out_tensor, end_points = inception_v4.inception_v4_base(
                 inputs, final_endpoint=endpoint)
             self.assertTrue(
                 out_tensor.op.name.startswith('InceptionV4/' + endpoint))
             self.assertItemsEqual(all_endpoints[:index + 1], end_points)
Exemple #3
0
 def testBuildBaseNetwork(self):
     batch_size = 5
     height, width = 299, 299
     inputs = tf.random_uniform((batch_size, height, width, 3))
     net, end_points = inception_v4.inception_v4_base(inputs)
     self.assertTrue(net.op.name.startswith('InceptionV4/Mixed_7d'))
     self.assertListEqual(net.get_shape().as_list(),
                          [batch_size, 8, 8, 1536])
     expected_endpoints = [
         'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
         'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
         'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
         'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
         'Mixed_7b', 'Mixed_7c', 'Mixed_7d'
     ]
     self.assertItemsEqual(end_points.keys(), expected_endpoints)
     for name, op in end_points.iteritems():
         self.assertTrue(op.name.startswith('InceptionV4/' + name))
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
            'layer_depth': [-1, -1, -1, 512, 256, 128],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV4',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v4.inception_v4_base(
                    ops.pad_to_multiple(preprocessed_inputs,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_7d',
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemple #5
0
def inception_v4_mod(images,
                 trainable=True,
                 is_training=True,
                 weight_decay=0.00004,
                 stddev=0.1,
                 dropout_keep_prob=0.8,
                 use_batch_norm=True,
                 batch_norm_params=None,
                 add_summaries=True,
                 scope="InceptionV4"):
  """Builds an Inception V3 subgraph for image embeddings.

  Args:
    images: A float32 Tensor of shape [batch, height, width, channels].
    trainable: Whether the inception submodel should be trainable or not.
    is_training: Boolean indicating training mode or not.
    weight_decay: Coefficient for weight regularization.
    stddev: The standard deviation of the trunctated normal weight initializer.
    dropout_keep_prob: Dropout keep probability.
    use_batch_norm: Whether to use batch normalization.
    batch_norm_params: Parameters for batch normalization. See
      tf.contrib.layers.batch_norm for details.
    add_summaries: Whether to add activation summaries.
    scope: Optional Variable scope.

  Returns:
    end_points: A dictionary of activations from inception_v3 layers.
  """
  # Only consider the inception model to be in training mode if it's trainable.
  is_inception_model_training = trainable and is_training

  if use_batch_norm:
    # Default parameters for batch normalization.
    if not batch_norm_params:
      batch_norm_params = {
          "is_training": is_inception_model_training,
          "trainable": trainable,
          # Decay for the moving averages.
          "decay": 0.9997,
          # Epsilon to prevent 0s in variance.
          "epsilon": 0.001,
          # Collection containing the moving mean and moving variance.
          "variables_collections": {
              "beta": None,
              "gamma": None,
              "moving_mean": ["moving_vars"],
              "moving_variance": ["moving_vars"],
          }
      }
  else:
    batch_norm_params = None

  if trainable:
    weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
  else:
    weights_regularizer = None

  with tf.variable_scope(scope, "InceptionV4", [images]) as scope:
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        weights_regularizer=weights_regularizer,
        trainable=trainable):
      with slim.arg_scope(
          [slim.conv2d],
          weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm,
          normalizer_params=batch_norm_params):
        net, end_points = inception_v4_base(images, scope=scope)
        with tf.variable_scope("logits"):
          shape = net.get_shape()
          net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool")
          net = slim.dropout(
              net,
              keep_prob=dropout_keep_prob,
              is_training=is_inception_model_training,
              scope="dropout")
          net = slim.flatten(net, scope="flatten")

  # Add summaries.
  if add_summaries:
    for v in end_points.values():
      tf.contrib.layers.summaries.summarize_activation(v)

  return net
Exemple #6
0
def fcn_inception(inputs, num_classes, is_training=None, scope=None):
    net, end_points = inception_v4.inception_v4_base(inputs)

    layer1_out = end_points['Conv2d_1a_3x3']
    layer2_out = end_points['Conv2d_2b_3x3']
    layer3_out = end_points['Mixed_3a']
    layer4_out = end_points['Mixed_4a']
    layer5_out = end_points['Mixed_5e']
    layer6_out = end_points['Mixed_6h']
    layer7_out = end_points['Mixed_7d']

    with tf.variable_scope(scope, 'fcn_inception'):
        deconv7 = slim.conv2d_transpose(layer7_out,
                                        1024,
                                        4,
                                        2,
                                        'VALID',
                                        scope='deconv7')
        end_points["fcn_inception/deconv7"] = deconv7

        add6 = tf.add(deconv7, layer6_out, name='add6')
        deconv6 = slim.conv2d_transpose(add6,
                                        384,
                                        3,
                                        2,
                                        'VALID',
                                        scope='deconv6')
        end_points["deconv6"] = deconv6

        add5 = tf.add(deconv6, layer5_out, name='add5')
        deconv5 = slim.conv2d_transpose(add5,
                                        192,
                                        4,
                                        2,
                                        'VALID',
                                        scope='deconv5')
        end_points["deconv5"] = deconv5

        add4 = tf.add(deconv5, layer4_out, name='add4')
        deconv4 = slim.conv2d_transpose(add4,
                                        160,
                                        3,
                                        1,
                                        'VALID',
                                        scope='deconv4')
        end_points["deconv4"] = deconv4

        add3 = tf.add(deconv4, layer3_out, name='add3')
        deconv3 = slim.conv2d_transpose(add3,
                                        64,
                                        3,
                                        2,
                                        'VALID',
                                        scope='deconv3')
        end_points["deconv3"] = deconv3

        add2 = tf.add(deconv3, layer2_out, name='add2')
        deconv2 = slim.conv2d_transpose(add2,
                                        32,
                                        3,
                                        1,
                                        'VALID',
                                        scope='deconv2')
        end_points["deconv2"] = deconv2

        add1 = tf.add(deconv2, layer1_out, name='add1')
        deconv1 = slim.conv2d_transpose(add1,
                                        num_classes,
                                        4,
                                        2,
                                        'VALID',
                                        scope='deconv1')
        end_points["deconv1"] = deconv1

        logits = tf.reshape(deconv1, [-1, num_classes], name='logits')
    return logits, end_points
Exemple #7
0
def cam_inception(inputs,
                  num_classes=number_of_classes,
                  is_training=True,
                  reuse=None,
                  delta=0.8):

    with tf.variable_scope('InceptionV4', [inputs], reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            net, end_points = inception_v4.inception_v4_base(inputs,
                                                             scope=scope)

    inception_c_feature = net
    with tf.variable_scope('cam_classifier/A'):
        net = slim.conv2d(
            inception_c_feature,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            padding='SAME',
            scope='conv1_3x3')
        net = slim.conv2d(
            net,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            padding='SAME',
            scope='conv2_3x3')
        net = slim.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            scope='conv3_1x1')
        end_points['features_A'] = net
        # GAP
        kernel_size = net.get_shape()[1:3]
        if kernel_size.is_fully_defined():
            net = slim.avg_pool2d(net,
                                  kernel_size,
                                  padding='VALID',
                                  scope='AvgPool_1a')
        else:
            net = tf.reduce_mean(net, [1, 2],
                                 keep_dims=True,
                                 name='global_pool')

        logits = slim.flatten(net, scope='Flatten')
        end_points['Logits'] = logits
        end_points['Predictions_A'] = tf.argmax(logits,
                                                1,
                                                name='Predictions_A')

    with tf.variable_scope('cam_classifier/B'):
        batch_size = inception_c_feature.get_shape()[0]
        channels = inception_c_feature.get_shape()[3]
        for n in range(batch_size):
            ca_map = end_points['features_A'][n, :, :,
                                              end_points['Predictions_A'][n]]
            ca_map = (ca_map - tf.reduce_min(ca_map)) / (
                tf.reduce_max(ca_map) - tf.reduce_min(ca_map))
            ca_map = tf.expand_dims(ca_map, 2)
            for i in range(channels):
                if i == 0:
                    erase_tmp = ca_map
                else:
                    erase_tmp = tf.concat([erase_tmp, ca_map], 2)
            erase_tmp = tf.expand_dims(erase_tmp, 0)
            if n == 0:
                erase = erase_tmp
            else:
                erase = tf.concat([erase, erase_tmp], 0)

        erased_feature = tf.where(tf.less(erase, delta), inception_c_feature,
                                  tf.zeros_like(inception_c_feature))
        aux_logits = slim.conv2d(
            erased_feature,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            padding='SAME',
            scope='conv1_3x3')
        aux_logits = slim.conv2d(
            aux_logits,
            1024, [3, 3],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            padding='SAME',
            scope='conv2_3x3')
        aux_logits = slim.conv2d(
            aux_logits,
            num_classes, [1, 1],
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
            scope='conv3_1x1')
        end_points['features_B'] = aux_logits
        # GAP
        if kernel_size.is_fully_defined():
            aux_logits = slim.avg_pool2d(aux_logits,
                                         kernel_size,
                                         padding='VALID',
                                         scope='AvgPool_1a')
        else:
            aux_logits = tf.reduce_mean(aux_logits, [1, 2],
                                        keep_dims=True,
                                        name='global_pool')

        aux_logits = slim.flatten(aux_logits, scope='Flatten')
        end_points['AuxLogits'] = aux_logits
        end_points['Predictions_B'] = tf.argmax(aux_logits,
                                                1,
                                                name='Predictions_B')

    return logits, end_points
def inception_v3(images,
                 trainable=True,
                 is_training=True,
                 weight_decay=0.00004,
                 stddev=0.1,
                 dropout_keep_prob=0.8,
                 use_batch_norm=True,
                 batch_norm_params=None,
                 add_summaries=True,
                 scope="InceptionV4"):
    """Builds an Inception V3 subgraph for image embeddings.

    Args:
      images: A float32 Tensor of shape [batch, height, width, channels].
      trainable: Whether the inception submodel should be trainable or not.
      is_training: Boolean indicating training mode or not.
      weight_decay: Coefficient for weight regularization.
      stddev: The standard deviation of the trunctated normal weight initializer.
      dropout_keep_prob: Dropout keep probability.
      use_batch_norm: Whether to use batch normalization.
      batch_norm_params: Parameters for batch normalization. See
        tf.contrib.layers.batch_norm for details.
      add_summaries: Whether to add activation summaries.
      scope: Optional Variable scope.

    Returns:
      end_points: A dictionary of activations from inception_v3 layers.
    """
    # Only consider the inception model to be in training mode if it's
    # trainable.
    is_inception_model_training = trainable and is_training

    if use_batch_norm:
        # Default parameters for batch normalization.
        if not batch_norm_params:
            batch_norm_params = {
                "is_training": is_inception_model_training,
                "trainable": trainable,
                # Decay for the moving averages.
                "decay": 0.9997,
                # Epsilon to prevent 0s in variance.
                "epsilon": 0.001,
                # Collection containing the moving mean and moving variance.
                "variables_collections": {
                    "beta": None,
                    "gamma": None,
                    "moving_mean": ["moving_vars"],
                    "moving_variance": ["moving_vars"],
                }
            }
    else:
        batch_norm_params = None

    if trainable:
        weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
    else:
        weights_regularizer = None

    with tf.variable_scope(scope, "InceptionV4", [images]) as scope:
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected],
            weights_regularizer=weights_regularizer,
                trainable=trainable):
            with slim.arg_scope(
                [slim.conv2d],
                weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                    normalizer_params=batch_norm_params):
                net, end_points = inception_v4.inception_v4_base(
                    images, scope=scope)
                with tf.variable_scope("logits"):
                    shape = net.get_shape()
                    net = slim.avg_pool2d(
                        net, shape[1:3], padding="VALID", scope="pool")
                    net = slim.dropout(
                        net,
                        keep_prob=dropout_keep_prob,
                        is_training=is_inception_model_training,
                        scope="dropout")
                    net = slim.flatten(net, scope="flatten")

    # Add summaries.
    if add_summaries:
        for v in end_points.values():
            tf.contrib.layers.summaries.summarize_activation(v)

    return net
    def build_model_graph(self, inputs, mode):
        if mode != self.MODE.TEST:
            image_batch, target_batch, large_target_batch = inputs
        else:
            image_batch, tid_batch = inputs

        with tf.name_scope('model') as scope:
            pad = self.config['receptive_field_size'] - self.config[
                'contextual_pad']
            image_batch = tf.pad(image_batch,
                                 [[0, 0], [pad, pad], [pad, pad], [0, 0]])

            def define_loss(preds, target):
                loss = None
                if self.config['loss_function'] == 'l17':
                    loss = tf.reduce_mean(tf.abs(target - preds)**1.7)
                elif self.config['loss_function'] == 'l2':
                    loss = tf.reduce_mean((target - preds)**2)
                else:
                    raise ValueError('Loss function %s is not supported.' %
                                     self.config['loss_function'])

                tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
                return loss

            batch_norm_params = {
                'decay': self.config['batch_norm_decay'],
                'epsilon': self.config['batch_norm_epsilon'],
                'is_training': (mode == self.MODE.TRAIN),
            }
            with slim.arg_scope([slim.conv2d],
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params):
                with slim.arg_scope([slim.batch_norm, slim.dropout],
                                    is_training=(mode == self.MODE.TRAIN)):

                    # Instantiate Inception-v4 up to and including Mixed_5a.
                    net, end_points = inception_v4.inception_v4_base(
                        image_batch, final_endpoint='Mixed_5a')

                    # 3 x Inception-A blocks, corresponding to Mixed_5b, Mixed_5c, Mixed_5d.
                    blocks = {}
                    for idx in range(3):
                        block_scope = 'Mixed_5' + chr(ord('b') + idx)
                        net = inception_v4.block_inception_a(net, block_scope)

                    net = slim.conv2d(net,
                                      96, [1, 1],
                                      scope='Conv2d1_1x1',
                                      padding='SAME')
                    net = slim.conv2d(net,
                                      96, [3, 3],
                                      scope='Conv2d2_3x3',
                                      padding='SAME')

            preds = slim.conv2d(net,
                                self.config['cls_nb'], [3, 3],
                                scope='Conv2d3_3x3_preds',
                                padding='SAME',
                                activation_fn=None,
                                normalizer_fn=None)

            if mode == self.MODE.TRAIN:
                define_loss(preds, target_batch)

            # Debugging
            self.debug_sequence = []
            if mode != self.MODE.TRAIN:
                self.debug_sequence.append(preds)

            def recurrence(features, hidden, preds):
                preds = tf.stop_gradient(preds)

                with tf.name_scope(scope, 'Recurrence',
                                   [features, hidden, preds]):
                    embed_pad = self.config['target_embedding_pad']
                    padded_preds = tf.pad(preds,
                                          [[0, 0], [embed_pad, embed_pad],
                                           [embed_pad, embed_pad], [0, 0]])
                    if mode == self.MODE.TRAIN:
                        padded_preds = large_target_batch + padded_preds

                    net = padded_preds
                    with slim.arg_scope([slim.conv2d],
                                        normalizer_fn=slim.batch_norm,
                                        normalizer_params=batch_norm_params):
                        with slim.arg_scope(
                            [slim.batch_norm, slim.dropout],
                                is_training=(mode == self.MODE.TRAIN)):

                            # Average pooling to blur the squres to make ground truth look
                            # more like the predictions.
                            # net = slim.avg_pool2d(net, 16, [3, 3], 1, scope='AvgPool', padding='VALID')
                            net = slim.conv2d(net,
                                              16, [3, 3],
                                              rate=1,
                                              scope='Dilated0',
                                              padding='VALID')
                            net = slim.conv2d(net,
                                              16, [3, 3],
                                              rate=2,
                                              scope='Dilated1',
                                              padding='VALID')
                            net = slim.conv2d(net,
                                              16, [3, 3],
                                              rate=3,
                                              scope='Dilated2',
                                              padding='VALID')
                            net = slim.conv2d(net,
                                              16, [3, 3],
                                              rate=4,
                                              scope='Dilated3',
                                              padding='VALID')
                            net = slim.conv2d(net,
                                              32, [3, 3],
                                              rate=5,
                                              scope='Dilated4',
                                              padding='VALID')
                            net = slim.conv2d(net,
                                              32, [3, 3],
                                              rate=1,
                                              scope='Dilated5',
                                              padding='VALID')

                            # 32 + 384 + 384 + 6
                            net = tf.concat([net, hidden, features, preds],
                                            axis=3)

                            # 3 x Inception-A blocks, corresponding to Mixed_5b, Mixed_5c, Mixed_5d.
                            for idx in range(3):
                                block_scope = 'Mixed_5' + chr(ord('b') + idx)
                                net = inception_v4.block_inception_a(
                                    net, block_scope)

                            hidden = slim.conv2d(
                                net,
                                self.config['hidden_state_size'], [1, 1],
                                scope='Conv2d0_1x1',
                                padding='SAME')

                            net = slim.conv2d(net,
                                              96, [1, 1],
                                              scope='Conv2d1_1x1',
                                              padding='SAME')
                            net = slim.conv2d(net,
                                              96, [3, 3],
                                              scope='Conv2d2_3x3',
                                              padding='SAME')

                    preds = slim.conv2d(net,
                                        self.config['cls_nb'], [3, 3],
                                        scope='Conv2d3_3x3_preds',
                                        padding='SAME',
                                        activation_fn=None,
                                        normalizer_fn=None)

                    if mode != self.MODE.TRAIN:
                        self.debug_sequence.append(preds)

                    loss = None
                    if mode == self.MODE.TRAIN:
                        loss = define_loss(preds, target_batch)

                    return preds, loss, hidden

            # 3 x Recurrent context blocks
            rnn_template = tf.make_template('rnn_shared_variables', recurrence)

            hidden_shape = [
                dim.value for dim in end_points['Mixed_5a'].get_shape()
            ]
            hidden_shape[-1] = self.config['hidden_state_size']
            hidden = tf.zeros(hidden_shape)

            final_loss = None
            for idx in range(3):
                preds, final_loss, hidden = rnn_template(
                    end_points['Mixed_5a'], hidden, preds)

        if mode == self.MODE.TRAIN:
            with tf.name_scope('stats'):
                tf.summary.scalar('final_loss',
                                  final_loss,
                                  collections=['train_summaries'])

                pf_area = self.config['projective_field_size']**2
                target_counts = tf.reduce_sum(target_batch / pf_area,
                                              axis=[1, 2])
                pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2])
                mae = tf.reduce_sum(tf.abs(pred_counts - target_counts))
                tf.summary.scalar('mae', mae, collections=['train_summaries'])

            def build_target_cache(input_shape, size, offset, equalize=True):
                max_size = 0
                output_size = 0
                sizes = np.empty((size, size, 4), dtype=np.int32)
                for y in range(size):
                    input_r = utilities.Rect(y + offset, 0, y + offset, 0,
                                             input_shape[1], input_shape[2])

                    r = utilities.calc_projective_field(
                        image_batch.name, end_points['Mixed_5a'].name, input_r)

                    sizes[y, :, 0] = r.min_y
                    sizes[:, y, 1] = r.min_y
                    sizes[y, :,
                          2] = r.max_y + 1  # For Python ranges and indices
                    sizes[:, y,
                          3] = r.max_y + 1  # which exclude the upper bound.

                    output_size = r.h

                    if r.height > max_size:
                        max_size = r.height

                if self.config['debug']:
                    print('Projective field size: %i' % max_size)
                    print('Output size: %i' % output_size)

                if equalize:
                    # Make all projective fields the same size.
                    for y in range(size):
                        for x in range(size):
                            if sizes[y, x, 2] - sizes[y, x, 0] < max_size:
                                if sizes[y, x,
                                         0] + max_size // 2 < output_size // 2:
                                    sizes[y, x, 2] = sizes[y, x, 0] + max_size
                                else:
                                    sizes[y, x, 0] = sizes[y, x, 2] - max_size
                            if sizes[y, x, 3] - sizes[y, x, 1] < max_size:
                                if sizes[y, x,
                                         1] + max_size // 2 < output_size // 2:
                                    sizes[y, x, 3] = sizes[y, x, 1] + max_size
                                else:
                                    sizes[y, x, 1] = sizes[y, x, 3] - max_size

                            if sizes[y, x, 2] - sizes[y, x, 0] != max_size \
                               or sizes[y, x, 3] - sizes[y, x, 1] != max_size:
                                print(sizes[y, x])

                return sizes, max_size

            self.target_sizes, self.config[
                'projective_field_size'] = build_target_cache(
                    [dim.value for dim in image_batch.get_shape()],
                    size=self.config['tile_size'],
                    offset=self.config['receptive_field_size'],
                )

            large_input_size = self.config[
                'tile_size'] + 2 * self.config['large_contextual_pad']
            self.target_sizes_large, _ = build_target_cache(
                [1, large_input_size, large_input_size, 1],
                size=self.config['tile_size'] +
                2 * self.config['large_contextual_pad_unpadded'],
                offset=self.config['receptive_field_size'],
                equalize=
                False,  # The area content of these squares does not matter.
            )

        if mode == self.MODE.VALIDATE:
            with tf.name_scope('stats'):
                # Mean absolute error
                pf_area = self.config['projective_field_size']**2
                target_counts = tf.reduce_sum(target_batch / pf_area,
                                              axis=[1, 2])
                pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2])
                mae = tf.reduce_sum(tf.abs(pred_counts - target_counts))
                mae_avg = utilities.RunningAverage(
                    'mae',
                    mae,
                    summary_args={'collections': ['stats_summaries']})

                # Accuracy
                acc = tf.reduce_mean(
                    tf.cast(
                        tf.abs(tf.reduce_mean(preds - target_batch, [1, 2])),
                        tf.float32))
                acc_avg = utilities.RunningAverage(
                    'accuracy',
                    acc,
                    summary_args={'collections': ['stats_summaries']})

            self.valid_op = tf.group(mae_avg.update_op, acc_avg.update_op)
            self.stats_reset_op = tf.group(mae_avg.reset_op, acc_avg.reset_op)
            self.score = mae_avg.value
            print(self.score)

            # Debugging
            self.debug_preds = preds
            self.debug_targets = target_batch
            self.debug_large_targets = large_target_batch

            embed_pad = self.config['target_embedding_pad']
            reduced_large_target_batch = tf.reduce_sum(large_target_batch,
                                                       axis=3,
                                                       keep_dims=True)
            reduced_target_batch = tf.reduce_sum(tf.pad(
                target_batch, [[0, 0], [embed_pad, embed_pad],
                               [embed_pad, embed_pad], [0, 0]]),
                                                 axis=3,
                                                 keep_dims=True)
            self.debug_combined_preds = tf.concat([
                reduced_large_target_batch, reduced_target_batch,
                tf.zeros_like(reduced_target_batch)
            ],
                                                  axis=3)

        if not mode == self.MODE.TRAIN:
            # Debugging
            self.debug_inputs = image_batch

        if mode == self.MODE.TEST:
            pf_area = self.config['projective_field_size']**2
            cond = tf.greater(preds, tf.fill(tf.shape(preds), 0.05))
            preds = tf.where(cond, preds, tf.zeros(tf.shape(preds)))
            pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2])

            self.test_op = (pred_counts, tid_batch)
Exemple #10
0
    def build_model_graph(self, inputs, mode):
        image_batch, target_batch, tid_batch = inputs

        # The tiles are extended by a margin of 32 px. This roughly corresponds to
        # the extend by which the receptive field in Mixed_5d are . This way, when
        # the window slides toward the boundaries of the image, the extended
        # receptive field can recognize the boundaries early enough such that it
        # can correctly make the distinction whether the center of an animal is
        # inside or outside of the tile.

        with tf.name_scope('model') as scope:
            pad = self.config['receptive_field_size'] - self.config[
                'contextual_pad']
            image_batch = tf.pad(image_batch,
                                 [[0, 0], [pad, pad], [pad, pad], [0, 0]])
            batch_norm_params = {
                'decay': self.config['batch_norm_decay'],
                'epsilon': self.config['batch_norm_epsilon'],
                'is_training': (mode == self.MODE.TRAIN),
            }
            with slim.arg_scope([slim.conv2d],
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params):
                with slim.arg_scope([slim.batch_norm, slim.dropout],
                                    is_training=(mode == self.MODE.TRAIN)):
                    net, end_points = inception_v4.inception_v4_base(
                        image_batch, final_endpoint='Mixed_5d')

            preds = slim.conv2d(net,
                                self.config['cls_nb'], [1, 1],
                                scope='Conv2d_1x1_preds',
                                activation_fn=None)

        if mode == self.MODE.TRAIN:
            # Build target cache
            self.target_sizes = np.empty(
                (self.config['tile_size'], self.config['tile_size'], 4),
                dtype=np.int32)
            input_shape = [dim.value for dim in image_batch.get_shape()]
            max_size = 0
            output_size = 0
            for y in range(self.config['tile_size']):
                input_r = utilities.Rect(
                    y + self.config['receptive_field_size'], 0,
                    y + self.config['receptive_field_size'], 0, input_shape[1],
                    input_shape[2])
                r = utilities.calc_projective_field(
                    image_batch.name, end_points['Mixed_5a'].name, input_r)
                output_size = r.h
                if r.height > max_size:
                    max_size = r.height
                self.target_sizes[y, :, 0] = r.min_y
                self.target_sizes[:, y, 1] = r.min_y
                self.target_sizes[
                    y, :, 2] = r.max_y + 1  # For Python ranges and indices
                self.target_sizes[:, y,
                                  3] = r.max_y + 1  # which exclude the upper bound.

            self.config['projective_field_size'] = max_size

            if self.config['debug']:
                print('Projective field size: %i' % max_size)
                print('Output size: %i' % output_size)

            # Make all projective fields the same size.
            for y in range(self.config['tile_size']):
                for x in range(self.config['tile_size']):
                    if self.target_sizes[y, x, 2] - self.target_sizes[
                            y, x, 0] < max_size:
                        if self.target_sizes[
                                y, x, 0] + max_size // 2 < output_size // 2:
                            self.target_sizes[
                                y, x,
                                2] = self.target_sizes[y, x, 0] + max_size
                        else:
                            self.target_sizes[
                                y, x,
                                0] = self.target_sizes[y, x, 2] - max_size
                    if self.target_sizes[y, x, 3] - self.target_sizes[
                            y, x, 1] < max_size:
                        if self.target_sizes[
                                y, x, 1] + max_size // 2 < output_size // 2:
                            self.target_sizes[
                                y, x,
                                3] = self.target_sizes[y, x, 1] + max_size
                        else:
                            self.target_sizes[
                                y, x,
                                1] = self.target_sizes[y, x, 3] - max_size

                    if self.target_sizes[y, x, 2] - self.target_sizes[y, x, 0] != max_size \
                       or self.target_sizes[y, x, 3] - self.target_sizes[y, x, 1] != max_size:
                        print(self.target_sizes[y, x])

            with tf.name_scope('training'):
                loss = tf.reduce_mean((target_batch - preds)**2)
                tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

        pf_area = self.config['projective_field_size']**2

        if mode == self.MODE.VALIDATE:
            with tf.name_scope('stats'):
                # Mean absolute error
                pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2])
                target_counts = tf.reduce_sum(target_batch / pf_area,
                                              axis=[1, 2])
                mae = tf.reduce_sum(tf.abs(pred_counts - target_counts))
                mae_avg = utilities.RunningAverage(
                    'mae',
                    mae,
                    summary_args={'collections': ['stats_summaries']})

                # Accuracy
                acc = tf.reduce_mean(
                    tf.cast(
                        tf.abs(tf.reduce_mean(preds - target_batch, [1, 2])),
                        tf.float32))
                acc_avg = utilities.RunningAverage(
                    'accuracy',
                    acc,
                    summary_args={'collections': ['stats_summaries']})

            with tf.control_dependencies(
                [mae_avg.update_op, acc_avg.update_op]):
                self.valid_op = tf.no_op(
                )  # All we need is the control dependencies above.

            self.stats_reset_op = tf.group(mae_avg.reset_op, acc_avg.reset_op)

            self.score = mae_avg.value

            self.debug_preds = preds
            self.debug_inputs = image_batch
            self.debug_targets = target_batch

        if mode == self.MODE.TEST:
            pf_area = (self.config['projective_field_size'] /
                       self.config['stride'])**2
            pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2])

            self.test_op = (pred_counts, tid_batch)