예제 #1
0
    def _get_pyramid_pooling_arguments(self,
                                       crop_size,
                                       output_stride,
                                       image_grid,
                                       image_pooling_crop_size=None):
        """Gets arguments for pyramid pooling.

        Args:
          crop_size: A list of two integers, [crop_height, crop_width] specifying
            whole patch crop size.
          output_stride: Integer, output stride value for extracted features.
          image_grid: A list of two integers, [image_grid_height, image_grid_width],
            specifying the grid size of how the pyramid pooling will be performed.
          image_pooling_crop_size: A list of two integers, [crop_height, crop_width]
            specifying the crop size for image pooling operations. Note that we
            decouple whole patch crop_size and image_pooling_crop_size as one could
            perform the image_pooling with different crop sizes.

        Returns:
          A list of (resize_value, pooled_kernel)
        """
        resize_height = utils.scale_dimension(crop_size[0], 1. / output_stride)
        resize_width = utils.scale_dimension(crop_size[1], 1. / output_stride)
        # If image_pooling_crop_size is not specified, use crop_size.
        if image_pooling_crop_size is None:
            image_pooling_crop_size = crop_size
        pooled_height = utils.scale_dimension(
            image_pooling_crop_size[0], 1. / (output_stride * image_grid[0]))
        pooled_width = utils.scale_dimension(
            image_pooling_crop_size[1], 1. / (output_stride * image_grid[1]))
        return ([resize_height, resize_width], [pooled_height, pooled_width])
예제 #2
0
  def _apply_conv_operation(self, net, operation, stride,
                            is_from_original_input):
    """Applies the predicted conv operation to net."""
    if stride > 1 and not is_from_original_input:
      stride = 1
    input_filters = net.shape[3]
    filter_size = self._filter_size
    if 'separable' in operation:
      num_layers = int(operation.split('_')[-1])
      kernel_size = int(operation.split('x')[0][-1])
      for layer_num in range(num_layers):
        net = tf.nn.relu(net)
        net = slim.separable_conv2d(
            net,
            filter_size,
            kernel_size,
            depth_multiplier=1,
            scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1),
            stride=stride)
        net = slim.batch_norm(
            net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
        stride = 1
    elif 'atrous' in operation:
      kernel_size = int(operation.split('x')[0][-1])
      net = tf.nn.relu(net)
      if stride == 2:
        scaled_height = scale_dimension(tf.shape(net)[1], 0.5)
        scaled_width = scale_dimension(tf.shape(net)[2], 0.5)
        net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype)
        net = slim.conv2d(net, filter_size, kernel_size, rate=1,
                          scope='atrous_{0}x{0}'.format(kernel_size))
      else:
        net = slim.conv2d(net, filter_size, kernel_size, rate=2,
                          scope='atrous_{0}x{0}'.format(kernel_size))
      net = slim.batch_norm(net, scope='bn_atr_{0}x{0}'.format(kernel_size))
    elif operation in ['none']:
      if stride > 1 or (input_filters != filter_size):
        net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
    elif 'pool' in operation:
      pooling_type = operation.split('_')[0]
      pooling_shape = int(operation.split('_')[-1].split('x')[0])
      if pooling_type == 'avg':
        net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      elif pooling_type == 'max':
        net = slim.max_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      else:
        raise ValueError('Unimplemented pooling type: ', pooling_type)
      if input_filters != filter_size:
        net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
    else:
      raise ValueError('Unimplemented operation', operation)

    if operation != 'none':
      net = self._apply_drop_path(net)
    return net
예제 #3
0
def _build_nas_base(images,
                    cell,
                    backbone,
                    num_classes,
                    hparams,
                    global_pool=False,
                    reuse=None,
                    scope=None,
                    final_endpoint=None):
    """Constructs a NAS model.

  Args:
    images: A tensor of size [batch, height, width, channels].
    cell: Cell structure used in the network.
    backbone: Backbone structure used in the network. A list of integers in
      which value 0 means "output_stride=4", value 1 means "output_stride=8",
      value 2 means "output_stride=16", and value 3 means "output_stride=32".
    num_classes: Number of classes to predict.
    hparams: Hyperparameters needed to construct the network.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    reuse: Whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    final_endpoint: The endpoint to construct the network up to.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    with tf.variable_scope(scope, 'nas', [images], reuse=reuse):
        end_points = {}

        def add_and_check_endpoint(endpoint_name, net):
            end_points[endpoint_name] = net
            return final_endpoint and (endpoint_name == final_endpoint)

        net, cell_outputs = _nas_stem(images)
        if add_and_check_endpoint('Stem', net):
            return net, end_points

        # Run the cells
        filter_scaling = 1.0
        for cell_num in range(len(backbone)):
            stride = 1
            if cell_num == 0:
                if backbone[0] == 1:
                    stride = 2
                    filter_scaling *= hparams.filter_scaling_rate
            else:
                if backbone[cell_num] == backbone[cell_num - 1] + 1:
                    stride = 2
                    filter_scaling *= hparams.filter_scaling_rate
                elif backbone[cell_num] == backbone[cell_num - 1] - 1:
                    scaled_height = scale_dimension(net.shape[1].value, 2)
                    scaled_width = scale_dimension(net.shape[2].value, 2)
                    net = resize_bilinear(net, [scaled_height, scaled_width],
                                          net.dtype)
                    filter_scaling /= hparams.filter_scaling_rate
            net = cell(net,
                       scope='cell_{}'.format(cell_num),
                       filter_scaling=filter_scaling,
                       stride=stride,
                       prev_layer=cell_outputs[-2],
                       cell_num=cell_num)
            if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
                return net, end_points
            cell_outputs.append(net)
        net = tf.nn.relu(net)

        if global_pool:
            # Global average pooling.
            net = tf.reduce_mean(net, [1, 2],
                                 name='global_pool',
                                 keepdims=True)
        if num_classes is not None:
            net = slim.conv2d(net,
                              num_classes, [1, 1],
                              activation_fn=None,
                              normalizer_fn=None,
                              scope='logits')
            end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points