def _cell_base(self, net, prev_layer): """Runs the beginning of the conv cell before the chosen ops are run.""" filter_size = self._filter_size if prev_layer is None: prev_layer = net else: if net.shape[2] != prev_layer.shape[2]: prev_layer = resize_bilinear(prev_layer, tf.shape(net)[1:3], prev_layer.dtype) if filter_size != prev_layer.shape[3]: prev_layer = tf.nn.relu(prev_layer) prev_layer = slim.conv2d(prev_layer, filter_size, 1, scope='prev_1x1') prev_layer = self._batch_norm_fn(prev_layer, scope='prev_bn') net = tf.nn.relu(net) net = slim.conv2d(net, filter_size, 1, scope='1x1') net = self._batch_norm_fn(net, scope='beginning_bn') net = tf.split(axis=3, num_or_size_splits=1, value=net) net.append(prev_layer) return net
def _apply_conv_operation(self, net, operation, stride, is_from_original_input): """Applies the predicted conv operation to net.""" if stride > 1 and not is_from_original_input: stride = 1 input_filters = net.shape[3] filter_size = self._filter_size if 'separable' in operation: num_layers = int(operation.split('_')[-1]) kernel_size = int(operation.split('x')[0][-1]) for layer_num in range(num_layers): net = tf.nn.relu(net) net = slim.separable_conv2d( net, filter_size, kernel_size, depth_multiplier=1, scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1), stride=stride) net = slim.batch_norm( net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1)) stride = 1 elif 'atrous' in operation: kernel_size = int(operation.split('x')[0][-1]) net = tf.nn.relu(net) if stride == 2: scaled_height = scale_dimension(tf.shape(net)[1], 0.5) scaled_width = scale_dimension(tf.shape(net)[2], 0.5) net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype) net = slim.conv2d(net, filter_size, kernel_size, rate=1, scope='atrous_{0}x{0}'.format(kernel_size)) else: net = slim.conv2d(net, filter_size, kernel_size, rate=2, scope='atrous_{0}x{0}'.format(kernel_size)) net = slim.batch_norm(net, scope='bn_atr_{0}x{0}'.format(kernel_size)) elif operation in ['none']: if stride > 1 or (input_filters != filter_size): net = tf.nn.relu(net) net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1') net = slim.batch_norm(net, scope='bn_1') elif 'pool' in operation: pooling_type = operation.split('_')[0] pooling_shape = int(operation.split('_')[-1].split('x')[0]) if pooling_type == 'avg': net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding='SAME') elif pooling_type == 'max': net = slim.max_pool2d(net, pooling_shape, stride=stride, padding='SAME') else: raise ValueError('Unimplemented pooling type: ', pooling_type) if input_filters != filter_size: net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1') net = slim.batch_norm(net, scope='bn_1') else: raise ValueError('Unimplemented operation', operation) if operation != 'none': net = self._apply_drop_path(net) return net
def _build_nas_base(images, cell, backbone, num_classes, hparams, global_pool=False, reuse=None, scope=None, final_endpoint=None): """Constructs a NAS model. Args: images: A tensor of size [batch, height, width, channels]. cell: Cell structure used in the network. backbone: Backbone structure used in the network. A list of integers in which value 0 means "output_stride=4", value 1 means "output_stride=8", value 2 means "output_stride=16", and value 3 means "output_stride=32". num_classes: Number of classes to predict. hparams: Hyperparameters needed to construct the network. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. reuse: Whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. final_endpoint: The endpoint to construct the network up to. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. end_points: A dictionary from components of the network to the corresponding activation. """ with tf.variable_scope(scope, 'nas', [images], reuse=reuse): end_points = {} def add_and_check_endpoint(endpoint_name, net): end_points[endpoint_name] = net return final_endpoint and (endpoint_name == final_endpoint) net, cell_outputs = _nas_stem(images) if add_and_check_endpoint('Stem', net): return net, end_points # Run the cells filter_scaling = 1.0 for cell_num in range(len(backbone)): stride = 1 if cell_num == 0: if backbone[0] == 1: stride = 2 filter_scaling *= hparams.filter_scaling_rate else: if backbone[cell_num] == backbone[cell_num - 1] + 1: stride = 2 filter_scaling *= hparams.filter_scaling_rate elif backbone[cell_num] == backbone[cell_num - 1] - 1: scaled_height = scale_dimension(net.shape[1].value, 2) scaled_width = scale_dimension(net.shape[2].value, 2) net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype) filter_scaling /= hparams.filter_scaling_rate net = cell(net, scope='cell_{}'.format(cell_num), filter_scaling=filter_scaling, stride=stride, prev_layer=cell_outputs[-2], cell_num=cell_num) if add_and_check_endpoint('Cell_{}'.format(cell_num), net): return net, end_points cell_outputs.append(net) net = tf.nn.relu(net) if global_pool: # Global average pooling. net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True) if num_classes is not None: net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') end_points['predictions'] = slim.softmax(net, scope='predictions') return net, end_points