Esempio n. 1
0
    def _cell_base(self, net, prev_layer):
        """Runs the beginning of the conv cell before the chosen ops are run."""
        filter_size = self._filter_size

        if prev_layer is None:
            prev_layer = net
        else:
            if net.shape[2] != prev_layer.shape[2]:
                prev_layer = resize_bilinear(prev_layer,
                                             tf.shape(net)[1:3],
                                             prev_layer.dtype)
            if filter_size != prev_layer.shape[3]:
                prev_layer = tf.nn.relu(prev_layer)
                prev_layer = slim.conv2d(prev_layer,
                                         filter_size,
                                         1,
                                         scope='prev_1x1')
                prev_layer = self._batch_norm_fn(prev_layer, scope='prev_bn')

        net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, scope='1x1')
        net = self._batch_norm_fn(net, scope='beginning_bn')
        net = tf.split(axis=3, num_or_size_splits=1, value=net)
        net.append(prev_layer)
        return net
Esempio n. 2
0
  def _apply_conv_operation(self, net, operation, stride,
                            is_from_original_input):
    """Applies the predicted conv operation to net."""
    if stride > 1 and not is_from_original_input:
      stride = 1
    input_filters = net.shape[3]
    filter_size = self._filter_size
    if 'separable' in operation:
      num_layers = int(operation.split('_')[-1])
      kernel_size = int(operation.split('x')[0][-1])
      for layer_num in range(num_layers):
        net = tf.nn.relu(net)
        net = separable_conv2d_same(
            net,
            filter_size,
            kernel_size,
            depth_multiplier=1,
            scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1),
            stride=stride)
        net = self._batch_norm_fn(
            net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
        stride = 1
    elif 'atrous' in operation:
      kernel_size = int(operation.split('x')[0][-1])
      net = tf.nn.relu(net)
      if stride == 2:
        scaled_height = scale_dimension(tf.shape(net)[1], 0.5)
        scaled_width = scale_dimension(tf.shape(net)[2], 0.5)
        net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype)
        net = resnet_utils.conv2d_same(
            net, filter_size, kernel_size, rate=1, stride=1,
            scope='atrous_{0}x{0}'.format(kernel_size))
      else:
        net = resnet_utils.conv2d_same(
            net, filter_size, kernel_size, rate=2, stride=1,
            scope='atrous_{0}x{0}'.format(kernel_size))
      net = self._batch_norm_fn(net, scope='bn_atr_{0}x{0}'.format(kernel_size))
    elif operation in ['none']:
      if stride > 1 or (input_filters != filter_size):
        net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
        net = self._batch_norm_fn(net, scope='bn_1')
    elif 'pool' in operation:
      pooling_type = operation.split('_')[0]
      pooling_shape = int(operation.split('_')[-1].split('x')[0])
      if pooling_type == 'avg':
        net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      elif pooling_type == 'max':
        net = slim.max_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      else:
        raise ValueError('Unimplemented pooling type: ', pooling_type)
      if input_filters != filter_size:
        net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
        net = self._batch_norm_fn(net, scope='bn_1')
    else:
      raise ValueError('Unimplemented operation', operation)

    if operation != 'none':
      net = self._apply_drop_path(net)
    return net
Esempio n. 3
0
  def _apply_conv_operation(self, net, operation, stride,
                            is_from_original_input):
    """Applies the predicted conv operation to net."""
    if stride > 1 and not is_from_original_input:
      stride = 1
    input_filters = net.shape[3]
    filter_size = self._filter_size
    if 'separable' in operation:
      num_layers = int(operation.split('_')[-1])
      kernel_size = int(operation.split('x')[0][-1])
      for layer_num in range(num_layers):
        net = tf.nn.relu(net)
        net = slim.separable_conv2d(
            net,
            filter_size,
            kernel_size,
            depth_multiplier=1,
            scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1),
            stride=stride)
        net = slim.batch_norm(
            net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
        stride = 1
    elif 'atrous' in operation:
      kernel_size = int(operation.split('x')[0][-1])
      net = tf.nn.relu(net)
      if stride == 2:
        scaled_height = scale_dimension(tf.shape(net)[1], 0.5)
        scaled_width = scale_dimension(tf.shape(net)[2], 0.5)
        net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype)
        net = slim.conv2d(net, filter_size, kernel_size, rate=1,
                          scope='atrous_{0}x{0}'.format(kernel_size))
      else:
        net = slim.conv2d(net, filter_size, kernel_size, rate=2,
                          scope='atrous_{0}x{0}'.format(kernel_size))
      net = slim.batch_norm(net, scope='bn_atr_{0}x{0}'.format(kernel_size))
    elif operation in ['none']:
      if stride > 1 or (input_filters != filter_size):
        net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
    elif 'pool' in operation:
      pooling_type = operation.split('_')[0]
      pooling_shape = int(operation.split('_')[-1].split('x')[0])
      if pooling_type == 'avg':
        net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      elif pooling_type == 'max':
        net = slim.max_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      else:
        raise ValueError('Unimplemented pooling type: ', pooling_type)
      if input_filters != filter_size:
        net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
        net = slim.batch_norm(net, scope='bn_1')
    else:
      raise ValueError('Unimplemented operation', operation)

    if operation != 'none':
      net = self._apply_drop_path(net)
    return net
Esempio n. 4
0
  def _cell_base(self, net, prev_layer):
    """Runs the beginning of the conv cell before the chosen ops are run."""
    filter_size = self._filter_size

    if prev_layer is None:
      prev_layer = net
    else:
      if net.shape[2] != prev_layer.shape[2]:
        prev_layer = resize_bilinear(
            prev_layer, tf.shape(net)[1:3], prev_layer.dtype)
      if filter_size != prev_layer.shape[3]:
        prev_layer = tf.nn.relu(prev_layer)
        prev_layer = slim.conv2d(prev_layer, filter_size, 1, scope='prev_1x1')
        prev_layer = slim.batch_norm(prev_layer, scope='prev_bn')

    net = tf.nn.relu(net)
    net = slim.conv2d(net, filter_size, 1, scope='1x1')
    net = slim.batch_norm(net, scope='beginning_bn')
    net = tf.split(axis=3, num_or_size_splits=1, value=net)
    net.append(prev_layer)
    return net
Esempio n. 5
0
def _build_nas_base(images,
                    cell,
                    backbone,
                    num_classes,
                    hparams,
                    global_pool=False,
                    reuse=None,
                    scope=None,
                    final_endpoint=None):
  """Constructs a NAS model.

  Args:
    images: A tensor of size [batch, height, width, channels].
    cell: Cell structure used in the network.
    backbone: Backbone structure used in the network. A list of integers in
      which value 0 means "output_stride=4", value 1 means "output_stride=8",
      value 2 means "output_stride=16", and value 3 means "output_stride=32".
    num_classes: Number of classes to predict.
    hparams: Hyperparameters needed to construct the network.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    reuse: Whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    final_endpoint: The endpoint to construct the network up to.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
  with tf.variable_scope(scope, 'nas', [images], reuse=reuse):
    end_points = {}
    def add_and_check_endpoint(endpoint_name, net):
      end_points[endpoint_name] = net
      return final_endpoint and (endpoint_name == final_endpoint)

    net, cell_outputs = _nas_stem(images)
    if add_and_check_endpoint('Stem', net):
      return net, end_points

    # Run the cells
    filter_scaling = 1.0
    for cell_num in range(len(backbone)):
      stride = 1
      if cell_num == 0:
        if backbone[0] == 1:
          stride = 2
          filter_scaling *= hparams.filter_scaling_rate
      else:
        if backbone[cell_num] == backbone[cell_num - 1] + 1:
          stride = 2
          filter_scaling *= hparams.filter_scaling_rate
        elif backbone[cell_num] == backbone[cell_num - 1] - 1:
          scaled_height = scale_dimension(net.shape[1].value, 2)
          scaled_width = scale_dimension(net.shape[2].value, 2)
          net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype)
          filter_scaling /= hparams.filter_scaling_rate
      net = cell(
          net,
          scope='cell_{}'.format(cell_num),
          filter_scaling=filter_scaling,
          stride=stride,
          prev_layer=cell_outputs[-2],
          cell_num=cell_num)
      if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
        return net, end_points
      cell_outputs.append(net)
    net = tf.nn.relu(net)

    if global_pool:
      # Global average pooling.
      net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
    if num_classes is not None:
      net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                        normalizer_fn=None, scope='logits')
      end_points['predictions'] = slim.softmax(net, scope='predictions')
    return net, end_points