def _conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout Args: ip: Input keras tensor nb_filter: number of filters bottleneck: add bottleneck block dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('conv_block'): x = BatchNormalization(axis=concat_axis, momentum=0.1, epsilon=1e-5)(ip) x = Activation('relu')(x) if bottleneck: inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=concat_axis, epsilon=1e-5, momentum=0.1)(x) x = Activation('relu')(x) x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, lr_mult=None, **kwargs): super(MultiSGD, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.momentum = K.variable(momentum, name='momentum') self.decay = K.variable(decay, name='decay') self.initial_decay = decay self.nesterov = nesterov self.lr_mult = lr_mult
def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4, block_prefix=None): ''' Adds a convolution layer (with batch normalization and relu), and optionally a bottleneck layer. # Arguments ip: Input tensor nb_filter: integer, the dimensionality of the output space (i.e. the number output of filters in the convolution) bottleneck: if True, adds a bottleneck convolution block dropout_rate: dropout rate weight_decay: weight decay factor block_prefix: str, for unique layer naming # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns output tensor of block ''' with K.name_scope('ConvBlock'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name=name_or_none(block_prefix, '_bn'))(ip) x = Activation('relu')(x) if bottleneck: inter_channel = nb_filter * 4 x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name=name_or_none(block_prefix, '_bottleneck_conv2D'))(x) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name=name_or_none(block_prefix, '_bottleneck_bn'))(x) x = Activation('relu')(x) x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False, name=name_or_none(block_prefix, '_conv2D'))(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True, return_concat_list=False, block_prefix=None): ''' Build a dense_block where the output of each conv_block is fed to subsequent ones # Arguments x: input keras tensor nb_layers: the number of conv_blocks to append to the model nb_filter: integer, the dimensionality of the output space (i.e. the number output of filters in the convolution) growth_rate: growth rate of the dense block bottleneck: if True, adds a bottleneck convolution block to each conv_block dropout_rate: dropout rate weight_decay: weight decay factor grow_nb_filters: if True, allows number of filters to grow return_concat_list: set to True to return the list of feature maps along with the actual output block_prefix: str, for block unique naming # Return If return_concat_list is True, returns a list of the output keras tensor, the number of filters and a list of all the dense blocks added to the keras tensor If return_concat_list is False, returns a list of the output keras tensor and the number of filters ''' with K.name_scope('DenseBlock'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x_list = [x] for i in range(nb_layers): cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay, block_prefix=name_or_none(block_prefix, '_%i' % i)) x_list.append(cb) x = concatenate([x, cb], axis=concat_axis) if grow_nb_filters: nb_filter += growth_rate if return_concat_list: return x, nb_filter, x_list else: return x, nb_filter
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4, block_prefix=None, transition_pooling='max'): ''' Adds a pointwise convolution layer (with batch normalization and relu), and an average pooling layer. The number of output convolution filters can be reduced by appropriately reducing the compression parameter. # Arguments ip: input keras tensor nb_filter: integer, the dimensionality of the output space (i.e. the number output of filters in the convolution) compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. weight_decay: weight decay factor block_prefix: str, for block unique naming # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, nb_filter * compression, rows / 2, cols / 2)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows / 2, cols / 2, nb_filter * compression)` if data_format='channels_last'. # Returns a keras tensor ''' with K.name_scope('Transition'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name=name_or_none(block_prefix, '_bn'))(ip) x = Activation('relu')(x) x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name=name_or_none(block_prefix, '_conv2D'))(x) if transition_pooling == 'avg': x = AveragePooling2D((2, 2), strides=(2, 2))(x) elif transition_pooling == 'max': x = MaxPooling2D((2, 2), strides=(2, 2))(x) return x
def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4, block_prefix=None): '''Adds an upsampling block. Upsampling operation relies on the the type parameter. # Arguments ip: input keras tensor nb_filters: integer, the dimensionality of the output space (i.e. the number output of filters in the convolution) type: can be 'upsampling', 'subpixel', 'deconv'. Determines type of upsampling performed weight_decay: weight decay factor block_prefix: str, for block unique naming # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, nb_filter, rows * 2, cols * 2)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows * 2, cols * 2, nb_filter)` if data_format='channels_last'. # Returns a keras tensor ''' with K.name_scope('TransitionUp'): if type == 'upsampling': x = UpSampling2D(name=name_or_none(block_prefix, '_upsampling'))(ip) elif type == 'subpixel': x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, kernel_initializer='he_normal', name=name_or_none(block_prefix, '_conv2D'))(ip) x = SubPixelUpscaling(scale_factor=2, name=name_or_none(block_prefix, '_subpixel'))(x) x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, kernel_initializer='he_normal', name=name_or_none(block_prefix, '_conv2D'))(x) else: x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), name=name_or_none(block_prefix, '_conv2DT'))(ip) return x
def _transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D Args: ip: keras tensor nb_filter: number of filters compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('transition_block'): x = BatchNormalization(axis=concat_axis, epsilon=1e-5, momentum=0.1)(ip) x = Activation('relu')(x) x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = AveragePooling2D((2, 2), strides=(2, 2))(x) return x
def _add_auxiliary_head(x, classes, weight_decay, pooling, include_top): '''Adds an auxiliary head for training the model From section A.7 "Training of ImageNet models" of the paper, all NASNet models are trained using an auxiliary classifier around 2/3 of the depth of the network, with a loss weight of 0.4 # Arguments x: input tensor classes: number of output classes weight_decay: l2 regularization weight # Returns a keras Tensor ''' img_height = 1 if K.image_data_format() == 'channels_last' else 2 img_width = 2 if K.image_data_format() == 'channels_last' else 3 channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('auxiliary_branch'): auxiliary_x = Activation('relu')(x) auxiliary_x = AveragePooling2D((5, 5), strides=(3, 3), padding='valid', name='aux_pool')(auxiliary_x) auxiliary_x = Conv2D(128, (1, 1), padding='same', use_bias=False, name='aux_conv_projection', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(auxiliary_x) auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='aux_bn_projection')(auxiliary_x) auxiliary_x = Activation('relu')(auxiliary_x) auxiliary_x = Conv2D(768, (auxiliary_x._keras_shape[img_height], auxiliary_x._keras_shape[img_width]), padding='valid', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), name='aux_conv_reduction')(auxiliary_x) auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='aux_bn_reduction')(auxiliary_x) auxiliary_x = Activation('relu')(auxiliary_x) if include_top: auxiliary_x = Flatten()(auxiliary_x) auxiliary_x = Dense(classes, activation='softmax', kernel_regularizer=l2(weight_decay), name='aux_predictions')(auxiliary_x) else: if pooling == 'avg': auxiliary_x = GlobalAveragePooling2D()(auxiliary_x) elif pooling == 'max': auxiliary_x = GlobalMaxPooling2D()(auxiliary_x) return auxiliary_x
def __init__(self, weight_decay, **kwargs): with K.name_scope(self.__class__.__name__): self.weight_decay = K.variable(weight_decay, name='weight_decay') super(DecoupleWeightDecay, self).__init__(**kwargs)
def get_gradient_dynamic_norm(model): with K.name_scope('gradient_dyn_norm'): grads_dyn = K.gradients(model.total_loss, model.trainable_weights[22:]) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads_dyn])) return norm
def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None): ''' Adjusts the input `p` to match the shape of the `input` or situations where the output number of filters needs to be changed # Arguments: p: input tensor which needs to be modified ip: input tensor whose shape needs to be matched filters: number of output filters to be matched weight_decay: l2 regularization weight id: string id # Returns: an adjusted Keras tensor ''' channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 img_dim = 2 if K.image_data_format() == 'channels_first' else -2 with K.name_scope('adjust_block'): if p is None: p = ip elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]: with K.name_scope('adjust_reduction_block_%s' % id): p = Activation('relu', name='adjust_relu_1_%s' % id)(p) p1 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % id)(p) p1 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_1_%s' % id, kernel_initializer='he_normal')(p1) p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % id)(p2) p2 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_2_%s' % id, kernel_initializer='he_normal')(p2) p = concatenate([p1, p2], axis=channel_dim) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) elif p._keras_shape[channel_dim] != filters: with K.name_scope('adjust_projection_block_%s' % id): p = Activation('relu')(p) p = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % id, use_bias=False, kernel_regularizer=l2(weight_decay), kernel_initializer='he_normal')(p) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) return p
def remove_squeezable_dimensions( labels, predictions, expected_rank_diff=0, name=None): """Squeeze last dim if ranks differ from expected by exactly 1. In the common case where we expect shapes to match, `expected_rank_diff` defaults to 0, and we squeeze the last dimension of the larger rank if they differ by 1. But, for example, if `labels` contains class IDs and `predictions` contains 1 probability per class, we expect `predictions` to have 1 more dimension than `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and `predictions` if `rank(predictions) - rank(labels) == 2`. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: labels: Label values, a `Tensor` whose dimensions match `predictions`. predictions: Predicted values, a `Tensor` of arbitrary dimensions. expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. name: Name of the op. Returns: Tuple of `labels` and `predictions`, possibly with last dim squeezed. """ with K.name_scope(name or 'remove_squeezable_dimensions'): if not isinstance(predictions, tf.RaggedTensor): predictions = tf.convert_to_tensor(predictions) if not isinstance(labels, tf.RaggedTensor): labels = tf.convert_to_tensor(labels) predictions_shape = predictions.shape predictions_rank = predictions_shape.ndims labels_shape = labels.shape labels_rank = labels_shape.ndims if (labels_rank is not None) and (predictions_rank is not None): # Use static rank. rank_diff = predictions_rank - labels_rank if (rank_diff == expected_rank_diff + 1 and predictions_shape.dims[-1].is_compatible_with(1)): predictions = tf.compat.v1.squeeze(predictions, [-1]) elif (rank_diff == expected_rank_diff - 1 and labels_shape.dims[-1].is_compatible_with(1)): labels = tf.compat.v1.squeeze(labels, [-1]) return labels, predictions # Use dynamic rank. rank_diff = tf.rank(predictions) - tf.rank(labels) if (predictions_rank is None) or ( predictions_shape.dims[-1].is_compatible_with(1)): predictions = tf.compat.v1.cond( tf.equal(expected_rank_diff + 1, rank_diff), lambda: tf.compat.v1.squeeze(predictions, [-1]), lambda: predictions) if (labels_rank is None) or ( labels_shape.dims[-1].is_compatible_with(1)): labels = tf.compat.v1.cond( tf.equal(expected_rank_diff - 1, rank_diff), lambda: tf.compat.v1.squeeze(labels, [-1]), lambda: labels) return labels, predictions
def vgg16(config, fake, real, layers): features = [] parameters = [] with K.name_scope("VGG16"): # Preprocess mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean") fake = tf.image.resize_images(fake, size=[224, 224]) * 255.0 - mean real = tf.image.resize_images(real, size=[224, 224]) * 255.0 - mean # First Convolution w_conv11 = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv11 = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv11, b_conv11] # Output conv11 = tf.nn.conv2d( fake, w_conv11, strides=[1, 1, 1, 1], padding='SAME') + b_conv11 conv11 = tf.nn.relu(conv11) # Ground-Truth conv11_gt = tf.nn.conv2d( real, w_conv11, strides=[1, 1, 1, 1], padding='SAME') + b_conv11 conv11_gt = tf.nn.relu(conv11_gt) # Loss if "relu11" in layers: features += [conv11, conv11_gt] # Second Convolution w_conv12 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv12 = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv12, b_conv12] # Output conv12 = tf.nn.conv2d( conv11, w_conv12, strides=[1, 1, 1, 1], padding='SAME') + b_conv12 conv12 = tf.nn.relu(conv12) # Ground-Truth conv12_gt = tf.nn.conv2d( conv11_gt, w_conv12, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv12 conv12_gt = tf.nn.relu(conv12_gt) # Loss if "relu12" in layers: features += [conv12, conv12_gt] # First Maxpool pool1 = tf.nn.max_pool(conv12, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool1") pool1_gt = tf.nn.max_pool(conv12_gt, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool1_gt") # Third Convolution w_conv21 = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv21 = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv21, b_conv21] # Output conv21 = tf.nn.conv2d( pool1, w_conv21, strides=[1, 1, 1, 1], padding='SAME') + b_conv21 conv21 = tf.nn.relu(conv21) # Ground-Truth conv21_gt = tf.nn.conv2d( pool1_gt, w_conv21, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv21 conv21_gt = tf.nn.relu(conv21_gt) # Loss if "relu21" in layers: features += [conv21, conv21_gt] # Fourth Convolution w_conv22 = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv22 = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv22, b_conv22] # Output conv22 = tf.nn.conv2d( conv21, w_conv22, strides=[1, 1, 1, 1], padding='SAME') + b_conv22 conv22 = tf.nn.relu(conv22) # Ground-Truth conv22_gt = tf.nn.conv2d( conv21_gt, w_conv22, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv22 conv22_gt = tf.nn.relu(conv22_gt) # Loss if "relu22" in layers: features += [conv22, conv22_gt] # Second Maxpool pool2 = tf.nn.max_pool(conv22, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool2") pool2_gt = tf.nn.max_pool(conv22_gt, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool2_gt") # Fifth Convolution w_conv31 = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv31 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv31, b_conv31] # Output conv31 = tf.nn.conv2d( pool2, w_conv31, strides=[1, 1, 1, 1], padding='SAME') + b_conv31 conv31 = tf.nn.relu(conv31) # Ground-Truth conv31_gt = tf.nn.conv2d( pool2_gt, w_conv31, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv31 conv31_gt = tf.nn.relu(conv31_gt) # Loss if "relu31" in layers: features += [conv31, conv31_gt] # Sixth Convolution w_conv32 = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv32 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv32, b_conv32] # Output conv32 = tf.nn.conv2d( conv31, w_conv32, strides=[1, 1, 1, 1], padding='SAME') + b_conv32 conv32 = tf.nn.relu(conv32) # Ground-Truth conv32_gt = tf.nn.conv2d( conv31_gt, w_conv32, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv32 conv32_gt = tf.nn.relu(conv32_gt) # Loss if "relu32" in layers: features += [conv32, conv32_gt] # Seventh Convolution w_conv33 = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv33 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv33, b_conv33] # Output conv33 = tf.nn.conv2d( conv32, w_conv33, strides=[1, 1, 1, 1], padding='SAME') + b_conv33 conv33 = tf.nn.relu(conv33) # Ground-Truth conv33_gt = tf.nn.conv2d( conv32_gt, w_conv33, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv33 conv33_gt = tf.nn.relu(conv33_gt) # Loss if "relu33" in layers: features += [conv33, conv33_gt] # Third Maxpool pool3 = tf.nn.max_pool(conv33, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool3") pool3_gt = tf.nn.max_pool(conv33_gt, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool3_gt") # Eighth Convolution w_conv41 = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv41 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv41, b_conv41] # Output conv41 = tf.nn.conv2d( pool3, w_conv41, strides=[1, 1, 1, 1], padding='SAME') + b_conv41 conv41 = tf.nn.relu(conv41) # Ground-Truth conv41_gt = tf.nn.conv2d( pool3_gt, w_conv41, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv41 conv41_gt = tf.nn.relu(conv41_gt) # Loss if "relu41" in layers: features += [conv41, conv41_gt] # Nineth Convolution w_conv42 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv42 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv42, b_conv42] # Output conv42 = tf.nn.conv2d( conv41, w_conv42, strides=[1, 1, 1, 1], padding='SAME') + b_conv42 conv42 = tf.nn.relu(conv42) # Ground-Truth conv42_gt = tf.nn.conv2d( conv41_gt, w_conv42, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv42 conv42_gt = tf.nn.relu(conv42_gt) # Loss if "relu42" in layers: features += [conv42, conv42_gt] # Tenth Convolution w_conv43 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv43 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv43, b_conv43] # Output conv43 = tf.nn.conv2d( conv42, w_conv43, strides=[1, 1, 1, 1], padding='SAME') + b_conv43 conv43 = tf.nn.relu(conv43) # Ground-Truth conv43_gt = tf.nn.conv2d( conv42_gt, w_conv43, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv43 conv43_gt = tf.nn.relu(conv43_gt) # Loss if "relu43" in layers: features += [conv43, conv43_gt] # Fourth Maxpool pool4 = tf.nn.max_pool(conv43, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool4") pool4_gt = tf.nn.max_pool(conv43_gt, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool4_gt") # Eleventh Convolution w_conv51 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv51 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv51, b_conv51] # Output conv51 = tf.nn.conv2d( pool4, w_conv51, strides=[1, 1, 1, 1], padding='SAME') + b_conv51 conv51 = tf.nn.relu(conv51) # Ground-Truth conv51_gt = tf.nn.conv2d( pool4_gt, w_conv51, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv51 conv51_gt = tf.nn.relu(conv51_gt) # Loss if "relu51" in layers: features += [conv51, conv51_gt] # Twelfth Convolution w_conv52 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv52 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv52, b_conv52] # Output conv52 = tf.nn.conv2d( conv51, w_conv52, strides=[1, 1, 1, 1], padding='SAME') + b_conv52 conv52 = tf.nn.relu(conv52) # Ground-Truth conv52_gt = tf.nn.conv2d( conv51_gt, w_conv52, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv52 conv52_gt = tf.nn.relu(conv52_gt) # Loss if "relu52" in layers: features += [conv52, conv52_gt] # Thirteenth Convolution w_conv53 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_conv53 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=False, name="biases") parameters += [w_conv53, b_conv53] # Output conv53 = tf.nn.conv2d( conv52, w_conv53, strides=[1, 1, 1, 1], padding='SAME') + b_conv53 conv53 = tf.nn.relu(conv53) # Ground-Truth conv53_gt = tf.nn.conv2d( conv52_gt, w_conv53, strides=[1, 1, 1, 1 ], padding='SAME') + b_conv53 conv53_gt = tf.nn.relu(conv53_gt) # Loss if "relu53" in layers: features += [conv53, conv53_gt] # Fifth Maxpool pool5 = tf.nn.max_pool(conv53, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool5") pool5_gt = tf.nn.max_pool(conv53_gt, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool5_gt") # FC Parameters shape = int(np.prod(pool5.get_shape()[1:])) pool5_flat = tf.reshape(pool5, [-1, shape]) pool5_gt_flat = tf.reshape(pool5_gt, [-1, shape]) # First FC w_fc1 = tf.Variable(tf.truncated_normal([shape, 4096], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_fc1 = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=False, name="biases") parameters += [w_fc1, b_fc1] # Output fc1 = tf.matmul(pool5_flat, w_fc1) + b_fc1 fc1 = tf.nn.relu(fc1) # Ground-Truth fc1_gt = tf.matmul(pool5_gt_flat, w_fc1) + b_fc1 fc1_gt = tf.nn.relu(fc1_gt) # Loss if "fc1" in layers: features += [fc1, fc1_gt] # Second FC w_fc2 = tf.Variable(tf.truncated_normal([4096, 4096], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_fc2 = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=False, name="biases") parameters += [w_fc2, b_fc2] # Output fc2 = tf.matmul(fc1, w_fc2) + b_fc2 fc2 = tf.nn.relu(fc2) # Ground-Truth fc2_gt = tf.matmul(fc1_gt, w_fc2) + b_fc2 fc2_gt = tf.nn.relu(fc2_gt) # Loss if "fc2" in layers: features += [fc2, fc2_gt] # Third FC w_fc3 = tf.Variable(tf.truncated_normal([4096, 1000], dtype=tf.float32, stddev=1e-1), trainable=False, name="weights") b_fc3 = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32), trainable=False, name="biases") parameters += [w_fc3, b_fc3] # Output fc3 = tf.matmul(fc2, w_fc3) + b_fc3 # Ground-Truth fc3_gt = tf.matmul(fc2_gt, w_fc3) + b_fc3 # Loss if "fc3" in layers: features += [fc3, fc3_gt] # Load Weights load_weights(config, parameters) return features
def _call(self, inputs, **kwargs): if self.proto_number == self.capsule_number: return inputs else: signals = inputs[0] diss = inputs[1] signal_shape = mixed_shape(signals) if self.use_for_loop: diss_stack = [] signals_stack = [] sub_idx = None with K.name_scope('for_loop'): for p in self._proto_distrib: with K.name_scope('compute_slices'): diss_ = diss[:, p[0]:(p[-1]+1)] signals_ = K.reshape(signals[:, p[0]:(p[-1]+1), :], [signal_shape[0] * len(p)] + list(signal_shape[2:])) with K.name_scope('competition'): if len(p) > 1: with K.name_scope('competition_indices'): argmin_idx = K.argmin(diss_, axis=-1) if sub_idx is None: sub_idx = K.arange(0, signal_shape[0], dtype=argmin_idx.dtype) argmin_idx = argmin_idx + len(p) * sub_idx with K.name_scope('dissimilarity_competition'): diss_stack.append(K.expand_dims(K.gather(K.flatten(diss_), argmin_idx), -1)) with K.name_scope('signal_competition'): signals_stack.append(K.gather(signals_, argmin_idx)) else: diss_stack.append(diss_) signals_stack.append(signals_) diss = K.concatenate(diss_stack, 1) with K.name_scope('signal_concatenation'): signals = K.concatenate(signals_stack, 1) signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:])) else: with K.name_scope('dissimilarity_preprocessing'): # extend if it is not equally distributed if not self._equally_distributed: # permute to first dimension is prototype (protos x batch) diss = K.permute_dimensions(diss, [1, 0]) # gather regarding extension (preparing for reshape to block) diss = K.gather(diss, self._proto_extension) # permute back (max_proto_number x (max_proto_number * batch)) diss = K.permute_dimensions(diss, [1, 0]) # reshape to block form diss = K.reshape(diss, [signal_shape[0] * self.capsule_number, self._max_proto_number_in_capsule]) with K.name_scope('competition_indices'): # get minimal idx in each class and batch for element selection in diss and signals argmin_idx = K.argmin(diss, axis=-1) argmin_idx = argmin_idx + self._max_proto_number_in_capsule * \ K.arange(0, signal_shape[0] * self.capsule_number, dtype=argmin_idx.dtype) with K.name_scope('dissimilarity_competition'): # get minimal values in the form (batch x capsule) diss = K.gather(K.flatten(diss), argmin_idx) diss = K.reshape(diss, [signal_shape[0], self.capsule_number]) with K.name_scope('signal_preprocessing'): # apply the same steps as above for signals # get signals in: (batch x protos x dim1 x ... x dimN) --> out: (batch x capsule x dim1 x ... x dimN) # extend if is not equally distributed if not self._equally_distributed: signals = K.permute_dimensions(signals, [1, 0] + list(range(2, len(signal_shape)))) signals = K.gather(signals, self._proto_extension) signals = K.permute_dimensions(signals, [1, 0] + list(range(2, len(signal_shape)))) signals = K.reshape(signals, [signal_shape[0] * self.capsule_number * self._max_proto_number_in_capsule] + list(signal_shape[2:])) with K.name_scope('signal_competition'): signals = K.gather(signals, argmin_idx) signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:])) return {0: signals, 1: diss}
def _normal_A(ip, p, filters, weight_decay=5e-5, id=None): '''Adds a Normal cell for NASNet-A (Fig. 4 in the paper) # Arguments: ip: input tensor `x` p: input tensor `p` filters: number of output filters weight_decay: l2 regularization weight id: string id # Returns: a Keras tensor ''' global NORMAL_IDX channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 weights = load_normal_call(NORMAL_IDX) NORMAL_IDX += 1 with K.name_scope('normal_A_block_%s' % id): p = _adjust_block(p, ip, filters, weight_decay, id, weights) h = Activation('relu')(ip) h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='normal_conv_1_%s' % id, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), weights=[weights['begin_W']])(h) h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='normal_bn_1_%s' % id, weights=weights['begin_bn'])(h) with K.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, kernel_size=(5, 5), weight_decay=weight_decay, id='normal_left1_%s' % id, weights=weights['left_0']) x1_2 = _separable_conv_block(p, filters, weight_decay=weight_decay, id='normal_right1_%s' % id, weights=weights['right_0']) x1 = add([x1_1, x1_2], name='normal_add_1_%s' % id) with K.name_scope('block_2'): x2_1 = _separable_conv_block(p, filters, (5, 5), weight_decay=weight_decay, id='normal_left2_%s' % id, weights=weights['left_1']) x2_2 = _separable_conv_block(p, filters, (3, 3), weight_decay=weight_decay, id='normal_right2_%s' % id, weights=weights['right_1']) x2 = add([x2_1, x2_2], name='normal_add_2_%s' % id) with K.name_scope('block_3'): x3 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_left3_%s' % (id))(h) x3 = add([x3, p], name='normal_add_3_%s' % id) with K.name_scope('block_4'): x4_1 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_left4_%s' % (id))(p) x4_2 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_right4_%s' % (id))(p) x4 = add([x4_1, x4_2], name='normal_add_4_%s' % id) with K.name_scope('block_5'): x5 = _separable_conv_block(h, filters, weight_decay=weight_decay, id='normal_left5_%s' % id, weights=weights['left_4']) x5 = add([x5, h], name='normal_add_5_%s' % id) x = concatenate([p, x1, x2, x3, x4, x5], axis=channel_dim, name='normal_concat_%s' % id) return x, ip
def _define_generator_loss(self, logits): with K.name_scope('G_loss'): return losses.l1_distance(self.fake_images, logits)
def _call(self, inputs, **kwargs): if self.proto_number == self.capsule_number: return inputs else: signals = inputs[0] diss = inputs[1] signal_shape = None # signal.shape: (batch, proto_num, caps_dim1, ..., caps_dimN) if self.input_spec[0].ndim > 3: signal_shape = mixed_shape(signals) signals = K.reshape(signals, signal_shape[0:2] + (-1,)) if not self._equally_distributed: if self.use_for_loop: signals_stack = [] diss_stack = [] with K.name_scope('for_loop'): for i, p in enumerate(self._proto_distrib): with K.name_scope('compute_slices'): diss_ = diss[:, p[0]:(p[-1]+1)] signals_ = signals[:, p[0]:(p[-1] + 1), :] if len(p) > 1: with K.name_scope('competition_probabilities'): coefficients = prob_trans.NegSoftmax(axis=-1, max_stabilization=True)( diss_ * self.beta[i]) with K.name_scope('signal_competition'): signals_stack.append(K.expand_dims(K.batch_dot(coefficients, signals_, [1, 1]), 1)) with K.name_scope('dissimilarity_competition'): diss_stack.append(K.batch_dot(coefficients, diss_, [1, 1])) else: signals_stack.append(signals_) diss_stack.append(diss_) signals = K.concatenate(signals_stack, axis=1) diss = K.concatenate(diss_stack, axis=-1) else: extension_idx = [] for i in self._proto_extension: if i not in extension_idx: extension_idx.append(i) else: extension_idx.append(max(self._proto_extension)+1) batch_size = K.shape(signals)[0] if signal_shape is None else signal_shape[0] # reshape to block with K.name_scope('competition_probabilities'): with K.name_scope('neg_softmax'): with K.name_scope('coefficients'): beta = K.gather(self.beta, self._capsule_extension) coefficients = -diss * beta # max stabilization coefficients = coefficients - K.max(coefficients, axis=-1, keepdims=True) coefficients = K.exp(coefficients) coefficients = K.concatenate([coefficients, K.zeros_like(coefficients[:, 0:1])], axis=-1) coefficients = K.transpose(coefficients) coefficients = K.gather(coefficients, extension_idx) coefficients = K.transpose(coefficients) coefficients = K.reshape(coefficients, [batch_size, self.capsule_number, self._max_proto_number_in_capsule]) # could never be a zero division with K.name_scope('normalization_constant'): constant = K.sum(coefficients, axis=-1, keepdims=True) probs = coefficients / constant with K.name_scope('dissimilarity_preprocessing'): diss = K.transpose(diss) diss = K.gather(diss, self._proto_extension) diss = K.transpose(diss) diss = K.reshape(diss, [batch_size, self.capsule_number, self._max_proto_number_in_capsule]) with K.name_scope('dissimilarity_competition'): diss = K.squeeze(K.batch_dot(probs, K.expand_dims(diss), [2, 2]), -1) with K.name_scope('signal_preprocessing'): signals = K.permute_dimensions(signals, [1, 0, 2]) signals = K.gather(signals, self._proto_extension) signals = K.permute_dimensions(signals, [1, 0, 2]) signals = K.reshape(signals, [batch_size, self.capsule_number, self._max_proto_number_in_capsule, -1]) with K.name_scope('signal_competition'): signals = K.batch_dot(probs, signals, [2, 2]) else: batch_size = K.shape(signals)[0] if signal_shape is None else signal_shape[0] diss = K.reshape(diss, [batch_size, self.capsule_number, self._max_proto_number_in_capsule]) with K.name_scope('competition_probabilities'): coefficients = prob_trans.NegSoftmax(axis=-1, max_stabilization=True)( diss * K.expand_dims(self.beta, -1)) with K.name_scope('signal_competition'): signals = K.reshape(signals, [batch_size, self.capsule_number, self._max_proto_number_in_capsule, -1]) signals = K.batch_dot(coefficients, signals, [2, 2]) with K.name_scope('dissimilarity_competition'): diss = K.squeeze(K.batch_dot(coefficients, K.expand_dims(diss), [2, 2]), -1) if self.input_spec[0].ndim > 3: signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:])) return {0: signals, 1: diss}
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. Args: model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2, number of on GPUs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. Returns: A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. Example 1: Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` Example 2: Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Example 3: Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Raises: ValueError: if the `gpus` argument does not match available devices. """ if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `len(gpus) >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] for device in target_devices: if device not in available_devices: raise ValueError( 'To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): """Slice an array into `parts` slices and return slice `i`. Args: data: array to slice. i: index of slice to return. parts: number of slices to make. Returns: Slice `i` of `data`. """ shape = tf.compat.v1.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = tf.concat([size, input_shape], axis=0) stride = tf.concat([step, input_shape * 0], axis=0) start = stride * i return tf.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: from keras.models import clone_model # pylint: disable=g-import-not-at-top with tf.compat.v1.device('/cpu:0'): model = clone_model(model) all_outputs = [[] for _ in range(len(model.outputs))] # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with tf.compat.v1.device('/gpu:%d' % gpu_id): with backend.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.shape.as_list())[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o, output in enumerate(outputs): all_outputs[o].append(output) # Deduplicate output names to handle Siamese networks. occurrences = {} for n in model.output_names: if n not in occurrences: occurrences[n] = 1 else: occurrences[n] += 1 conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} output_names = [] for n in model.output_names: if n in conflict_counter: conflict_counter[n] += 1 n += '_%d' % conflict_counter[n] output_names.append(n) # Merge outputs under expected scope. with tf.compat.v1.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged)
def gradient_norm(model): with K.name_scope('gradient_norm'): grads = K.gradients(model.total_loss, model.trainable_weights) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) return norm
def build(self, input_shape): with backend.name_scope(self.forward_layer.name): self.forward_layer.build(input_shape) with backend.name_scope(self.backward_layer.name): self.backward_layer.build(input_shape) self.built = True
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, subsample_initial_block=False, pooling=None, activation='sigmoid', transition_pooling='avg'): ''' Build the DenseNet model # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer depth: number or layers nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate nb_layers_per_block: number of layers in each dense block. Can be a -1, positive integer or a list. If -1, calculates nb_layer_per_block from the depth of the network. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay rate subsample_initial_block: Changes model type to suit different datasets. Should be set to True for ImageNet, and False for CIFAR datasets. When set to True, the initial convolution will be strided and adds a MaxPooling2D before the initial dense block. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. transition_pooling: `avg` for avg pooling (default), `max` for max pooling, None for no pooling during scale transition blocks. Please note that this default differs from the DenseNetFCN paper in accordance with the DenseNet paper. # Returns a keras tensor # Raises ValueError: in case of invalid argument for `reduction` or `nb_dense_block` ''' with K.name_scope('DenseNet'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if reduction != 0.0: if not (reduction <= 1.0 and reduction > 0.0): raise ValueError('`reduction` value must lie between 0.0 and 1.0') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list if len(nb_layers) != (nb_dense_block): raise ValueError('If `nb_dense_block` is a list, its length must match ' 'the number of layers provided by `nb_layers`.') final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' count = int((depth - 4) / 3) if bottleneck: count = count // 2 nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count else: final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate # compute compression factor compression = 1.0 - reduction # Initial convolution if subsample_initial_block: initial_kernel = (7, 7) initial_strides = (2, 2) else: initial_kernel = (3, 3) initial_strides = (1, 1) x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', name='initial_conv2D', strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) if subsample_initial_block: x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) # Add dense blocks for block_idx in range(nb_dense_block - 1): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % block_idx) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling) nb_filter = int(nb_filter * compression) # The last dense_block does not have a transition_block x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % (nb_dense_block - 1)) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x) x = Activation('relu')(x) if include_top: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) x = Dense(nb_classes, activation=activation)(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) return x
def _call(self, inputs, **kwargs): # inverse permutation def inv_perm(perm): inverse = [0] * len(perm) for i, p in enumerate(perm): inverse[p] = i return inverse # signal is dict: extract signal from diss signals = inputs[0] signal_shape = mixed_shape(signals) ndim = self.input_spec[0].ndim atom_axes = list(range(3, ndim)) atom_axes.remove(self.axis) if self.scope == 'local': with K.name_scope('signal_preprocessing'): perm = [1, 2, self.axis, 0] + atom_axes signals = K.permute_dimensions(signals, perm) if ndim > 4: signals = K.reshape(signals, [ signal_shape[1], signal_shape[2], signal_shape[self.axis], -1 ]) with K.name_scope('linear_mapping'): # multiply over all batches by using the Theano behavior signals = K.batch_dot(self.linear_maps, signals, axes=[2, 2]) with K.name_scope('signal_postprocessing'): if ndim > 4: signals = K.reshape(signals, [ signal_shape[1], signal_shape[2], self.output_dim, signal_shape[0] ] + [signal_shape[i] for i in atom_axes]) signals = K.permute_dimensions(signals, inv_perm(perm)) elif self.scope == 'global': with K.name_scope('signal_preprocessing'): dims = list(range(ndim)) dims.remove(self.axis) perm = dims + [self.axis] signals = K.permute_dimensions(signals, perm) with K.name_scope('linear_mapping'): signals = K.dot(signals, self.linear_maps) with K.name_scope('signal_postprocessing'): signals = K.permute_dimensions(signals, inv_perm(perm)) elif self.scope == 'channel_wise': with K.name_scope('signal_preprocessing'): perm = [2, self.axis, 0, 1] + atom_axes signals = K.permute_dimensions(signals, perm) signals = K.reshape( signals, [signal_shape[2], signal_shape[self.axis], -1]) with K.name_scope('linear_mapping'): # multiply over all batches by using the Theano behavior signals = K.batch_dot(self.linear_maps, signals, axes=[1, 1]) with K.name_scope('signal_postprocessing'): signals = K.reshape(signals, [ signal_shape[2], self.output_dim, signal_shape[0], signal_shape[1] ] + [signal_shape[i] for i in atom_axes]) signals = K.permute_dimensions(signals, inv_perm(perm)) else: # capsule_wise with K.name_scope('signal_preprocessing'): perm = [1, self.axis, 0, 2] + atom_axes signals = K.permute_dimensions(signals, perm) signals = K.reshape( signals, [signal_shape[1], signal_shape[self.axis], -1]) with K.name_scope('linear_mapping'): # multiply over all batches by using the Theano behavior signals = K.batch_dot(self.linear_maps, signals, axes=[1, 1]) with K.name_scope('signal_postprocessing'): signals = K.reshape(signals, [ signal_shape[1], self.output_dim, signal_shape[0], signal_shape[2] ] + [signal_shape[i] for i in atom_axes]) signals = K.permute_dimensions(signals, inv_perm(perm)) inputs[0] = signals return inputs
def _reduction_a_cell(ip, p, filters, block_id=None): '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). # Arguments ip: Input tensor `x` p: Input tensor `p` filters: Number of output filters block_id: String block_id # Returns A Keras tensor ''' channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 with backend.name_scope('reduction_A_block_%s' % block_id): p = _adjust_block(p, ip, filters, block_id) h = layers.Activation('relu')(ip) h = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', kernel_regularizer=l2(weight_decay), name='reduction_conv_1_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(h) if use_bn: h = layers.BatchNormalization(axis=channel_dim, momentum=bn_momentum, epsilon=1e-3, name='reduction_bn_1_%s' % block_id)(h) h = layers.SpatialDropout2D(drop_p)(h) h3 = layers.ZeroPadding2D(padding=correct_pad(backend, h, 3), name='reduction_pad_1_%s' % block_id)(h) with backend.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), block_id='reduction_left1_%s' % block_id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right1_%s' % block_id) x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) with backend.name_scope('block_2'): x2_1 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left2_%s' % block_id)(h3) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right2_%s' % block_id) x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) with backend.name_scope('block_3'): x3_1 = layers.AveragePooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left3_%s' % block_id)(h3) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), block_id='reduction_right3_%s' % block_id) x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) with backend.name_scope('block_4'): x4 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % block_id)(x1) x4 = layers.add([x2, x4]) with backend.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id) x5_2 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_right5_%s' % block_id)(h3) x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) x = layers.concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % block_id) return x, ip
def __init__(self, standard_deviation=0.3, **kwargs): super(NoisyOptimizer, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.standard_deviation = K.variable(standard_deviation, name='standard_deviation')
def _num_elements(losses): """Computes the number of elements in `losses` tensor.""" with K.name_scope('num_elements') as scope: return tf.cast(tf.compat.v1.size(losses, name=scope), dtype=losses.dtype)
def NASNet(input_shape=None, penultimate_filters=4032, nb_blocks=6, stem_filters=96, skip_reduction=True, use_auxilary_branch=False, filters_multiplier=2, dropout=0.5, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000, default_size=None): """Instantiates a NASNet architecture. Note that only TensorFlow is supported for now, therefore it only works with the data format `image_data_format='channels_last'` in your Keras config at `~/.keras/keras.json`. # Arguments input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(331, 331, 3)` for NASNetLarge or `(224, 224, 3)` for NASNetMobile It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. penultimate_filters: number of filters in the penultimate layer. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters nb_blocks: number of repeated blocks of the NASNet model. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters stem_filters: number of filters in the initial stem block skip_reduction: Whether to skip the reduction step at the tail end of the network. Set to `False` for CIFAR models. use_auxilary_branch: Whether to use the auxilary branch during training or evaluation. filters_multiplier: controls the width of the network. - If `filters_multiplier` < 1.0, proportionally decreases the number of filters in each layer. - If `filters_multiplier` > 1.0, proportionally increases the number of filters in each layer. - If `filters_multiplier` = 1, default number of filters from the paper are used at each layer. dropout: dropout rate include_top: whether to include the fully-connected layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. default_size: specifies the default image size of the model # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if K.backend() != 'tensorflow': raise RuntimeError('Only Tensorflow backend is currently supported, ' 'as other backends do not support ' 'separable convolution.') if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as ImageNet with `include_top` ' 'as true, `classes` should be 1000') if default_size is None: default_size = 331 # Determine proper input shape and default size. input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top or weights) if K.image_data_format() != 'channels_last': warnings.warn('The MobileNet family of models is only available ' 'for the input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height).' ' You should set `image_data_format="channels_last"` ' 'in your Keras config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') K.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor assert penultimate_filters % 24 == 0, "`penultimate_filters` needs to be divisible " \ "by 6 * (2^N)." channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 filters = penultimate_filters // 24 x = Conv2D(stem_filters, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='stem_conv1', kernel_initializer='he_normal')(img_input) x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='stem_bn1')(x) x, p = _reduction_A(x, None, filters // (filters_multiplier**2), id='stem_1') x, p = _reduction_A(x, p, filters // filters_multiplier, id='stem_2') for i in range(nb_blocks): x, p = _normal_A(x, p, filters, id='%d' % (i)) x, p0 = _reduction_A(x, p, filters * filters_multiplier, id='reduce_%d' % (nb_blocks)) p = p0 if not skip_reduction else p for i in range(nb_blocks): x, p = _normal_A(x, p, filters * filters_multiplier, id='%d' % (nb_blocks + i + 1)) auxilary_x = None if use_auxilary_branch: img_height = 1 if K.image_data_format() == 'channels_first' else 2 img_width = 2 if K.image_data_format() == 'channels_first' else 3 with K.name_scope('auxilary_branch'): auxilary_x = Activation('relu')(x) auxilary_x = AveragePooling2D((5, 5), strides=(3, 3), padding='valid', name='aux_pool')(auxilary_x) auxilary_x = Conv2D(128, (1, 1), padding='same', use_bias=False, name='aux_conv_projection', kernel_initializer='he_normal')(auxilary_x) auxilary_x = BatchNormalization( axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='aux_bn_projection')(auxilary_x) auxilary_x = Activation('relu')(auxilary_x) auxilary_x = Conv2D(768, (auxilary_x._keras_shape[img_height], auxilary_x._keras_shape[img_width]), padding='valid', use_bias=False, kernel_initializer='he_normal', name='aux_conv_reduction')(auxilary_x) auxilary_x = BatchNormalization( axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='aux_bn_reduction')(auxilary_x) auxilary_x = Activation('relu')(auxilary_x) auxilary_x = GlobalAveragePooling2D()(auxilary_x) auxilary_x = Dense(classes, activation='softmax', name='aux_predictions')(auxilary_x) x, p0 = _reduction_A(x, p, filters * filters_multiplier**2, id='reduce_%d' % (2 * nb_blocks)) p = p0 if not skip_reduction else p for i in range(nb_blocks): x, p = _normal_A(x, p, filters * filters_multiplier**2, id='%d' % (2 * nb_blocks + i + 1)) x = Activation('relu')(x) if include_top: x = GlobalAveragePooling2D()(x) x = Dropout(dropout)(x) x = Dense(classes, activation='softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. if use_auxilary_branch: model = Model(inputs, [x, auxilary_x], name='NASNet_with_auxilary') else: model = Model(inputs, x, name='NASNet') # load weights (when available) warnings.warn( 'Weights of NASNet models have not been ported yet for Keras.') if old_data_format: K.set_image_data_format(old_data_format) return model
def __call__(self, inputs, **kwargs): if isinstance(inputs, list): inputs = inputs[:] with K.name_scope(self.name): # Raise exceptions in case the input is not compatible # with the input_spec specified in the layer constructor. self.assert_input_compatibility(inputs) # Handle laying building (weight creating, input spec locking). if not self.built: self.build(inputs) self.built = True # Handle mask propagation. previous_mask = _collect_previous_mask(inputs) user_kwargs = copy.copy(kwargs) if not _is_all_none(previous_mask): # The previous layer generated a mask. if has_arg(self.call, 'mask'): if 'mask' not in kwargs: # If mask is explicitly passed to __call__, # we should override the default mask. kwargs['mask'] = previous_mask # Handle automatic shape inference (only useful for Theano). input_shape = _collect_input_shape(inputs) # Actually call the layer, collecting output(s), mask(s), and shape(s). output = self.call(inputs, **kwargs) output_mask = self.compute_mask(inputs, previous_mask) # If the layer returns tensors from its inputs, unmodified, # we copy them to avoid loss of tensor metadata. output_ls = _to_list(output) inputs_ls = _to_list(inputs) output_ls_copy = [] for x in output_ls: if x in inputs_ls: x = K.identity(x) output_ls_copy.append(x) if len(output_ls_copy) == 1: output = output_ls_copy[0] else: output = output_ls_copy # Inferring the output shape is only relevant for Theano. if all([s is not None for s in _to_list(input_shape)]): output_shape = self.compute_output_shape(input_shape) else: if isinstance(input_shape, list): output_shape = [None for _ in input_shape] else: output_shape = None if not isinstance(output_mask, (list, tuple)) and len(output_ls) > 1: # Augment the mask to match the length of the output. output_mask = [output_mask] * len(output_ls) # Add an inbound node to the layer, so that it keeps track # of the call and of all new variables created during the call. # This also updates the layer history of the output tensor(s). # If the input tensor(s) had not previous Keras history, # this does nothing. self._add_inbound_node(input_tensors=inputs, output_tensors=output, input_masks=previous_mask, output_masks=output_mask, input_shapes=input_shape, output_shapes=output_shape, arguments=user_kwargs) # Apply activity regularizer if any: if hasattr(self, 'activity_regularizer' ) and self.activity_regularizer is not None: regularization_losses = [ self.activity_regularizer(x) for x in _to_list(output) ] self.add_loss(regularization_losses, _to_list(inputs)) return output
def transform(images, transforms, fill_mode='reflect', fill_value=0.0, interpolation='bilinear', output_shape=None, name=None): """Applies the given transform(s) to the image(s). Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows, num_columns) (HW). The rank must be statically known (the shape is not `TensorShape(None)`. transforms: Projective transform matrix/matrices. A vector of length 8 or tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). fill_value: a float represents the value to be filled outside the boundaries when `fill_mode` is "constant". interpolation: Interpolation mode. Supported values: "nearest", "bilinear". output_shape: Output dimesion after the transform, [height, width]. If None, output is the same size as input image. name: The name of the op. ## Fill mode. Behavior for each valid value is as follows: reflect (d c b a | a b c d | d c b a) The input is extended by reflecting about the edge of the last pixel. constant (k k k k | a b c d | k k k k) The input is extended by filling all values beyond the edge with the same constant value k = 0. wrap (a b c d | a b c d | a b c d) The input is extended by wrapping around to the opposite edge. nearest (a a a a | a b c d | d d d d) The input is extended by the nearest pixel. Input shape: 4D tensor with shape: `(samples, height, width, channels)`, data_format='channels_last'. Output shape: 4D tensor with shape: `(samples, height, width, channels)`, data_format='channels_last'. Returns: Image(s) with the same type and shape as `images`, with the given transform(s) applied. Transformed coordinates outside of the input image will be filled with zeros. Raises: TypeError: If `image` is an invalid type. ValueError: If output shape is not 1-D int32 Tensor. """ with backend.name_scope(name or 'transform'): if output_shape is None: output_shape = tf.compat.v1.shape(images)[1:3] if not tf.executing_eagerly(): output_shape_value = tf.get_static_value(output_shape) if output_shape_value is not None: output_shape = output_shape_value output_shape = tf.convert_to_tensor( output_shape, tf.int32, name='output_shape') if not output_shape.get_shape().is_compatible_with([2]): raise ValueError('output_shape must be a 1-D Tensor of 2 elements: ' 'new_height, new_width, instead got ' '{}'.format(output_shape)) fill_value = tf.convert_to_tensor( fill_value, tf.float32, name='fill_value') return tf.raw_ops.ImageProjectiveTransformV3( images=images, output_shape=output_shape, fill_value=fill_value, transforms=transforms, fill_mode=fill_mode.upper(), interpolation=interpolation.upper())
def get_weight_static_norm(model): with K.name_scope('w_static_norm'): weights = model.trainable_weights[0:22] w_norm = K.sqrt(sum([K.sum(K.square(w)) for w in weights])) return w_norm
def _ReductionCell(self, filters, prefix, prev, cur): with K.name_scope('reduce'): prev = self._Fit(filters=filters, target_layer=cur, prefix=prefix, net=prev) cur = self._SqueezeChannels(filters=filters, prefix=prefix, x=cur) # Full in with K.name_scope('comb_iter_0'): prefix = '{}/comb_iter_0'.format(prefix) add_0 = Add()([ self._Separable(filters=filters, kernel_size=5, strides=2, prefix='{}/left'.format(prefix), net=cur), self._Separable(filters=filters, kernel_size=7, strides=2, prefix='{}/right'.format(prefix), net=prev) ]) with K.name_scope('comb_iter_1'): prefix = '{}/comb_iter_1'.format(prefix) add_1 = Add()([ MaxPooling2D(3, strides=2, padding='same')(cur), self._Separable(filters=filters, kernel_size=7, strides=2, prefix='{}/right'.format(prefix), net=prev) ]) with K.name_scope('comb_iter_2'): prefix = '{}/comb_iter_2'.format(prefix) add_2 = Add()([ AveragePooling2D(3, strides=2, padding='same')(cur), self._Separable(filters=filters, kernel_size=5, strides=2, prefix='{}/right'.format(prefix), net=prev) ]) # Reduced after stride with K.name_scope('comb_iter_3'): add_3 = Add()([ AveragePooling2D(3, strides=1, padding='same')(add_0), add_1 ]) with K.name_scope('comb_iter_4'): prefix = '{}/comb_iter_4'.format(prefix) add_4 = Add()([ self._Separable(filters=filters, kernel_size=3, strides=1, prefix='{}/left'.format(prefix), net=add_0), MaxPooling2D(3, strides=2, padding='same')(cur) ]) return Concatenate(axis=-1)([add_1, add_2, add_3, add_4])
def get_gradient_static_norm(model): with K.name_scope('gradient_static_norm'): grads_static = K.gradients(model.total_loss, model.trainable_weights[0:22]) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads_static])) return norm
def __init__(self, optimizer, gdev_list=None): self.optimizer = optimizer self._gdev_list = gdev_list with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations')
def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None): '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper) # Arguments: ip: input tensor `x` p: input tensor `p` filters: number of output filters weight_decay: l2 regularization weight id: string id # Returns: a Keras tensor ''' """""" channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('reduction_A_block_%s' % id): p = _adjust_block(p, ip, filters, weight_decay, id) h = Activation('relu')(ip) h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % id, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(h) h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='reduction_bn_1_%s' % id)(h) with K.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay, id='reduction_left1_%s' % id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay, id='reduction_1_%s' % id) x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % id) with K.name_scope('block_2'): x2_1 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left2_%s' % id)(h) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay, id='reduction_right2_%s' % id) x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % id) with K.name_scope('block_3'): x3_1 = AveragePooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left3_%s' % id)(h) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay, id='reduction_right3_%s' % id) x3 = add([x3_1, x3_2], name='reduction_add3_%s' % id) with K.name_scope('block_4'): x4 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % id)(x1) x4 = add([x2, x4]) with K.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), weight_decay=weight_decay, id='reduction_left4_%s' % id) x5_2 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_right5_%s' % id)(h) x5 = add([x5_1, x5_2], name='reduction_add4_%s' % id) x = concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % id) return x, ip
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, subsample_initial_block=False, pooling=None, activation='softmax', transition_pooling='avg'): ''' Build the DenseNet model # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer depth: number or layers nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate nb_layers_per_block: number of layers in each dense block. Can be a -1, positive integer or a list. If -1, calculates nb_layer_per_block from the depth of the network. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay rate subsample_initial_block: Changes model type to suit different datasets. Should be set to True for ImageNet, and False for CIFAR datasets. When set to True, the initial convolution will be strided and adds a MaxPooling2D before the initial dense block. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. transition_pooling: `avg` for avg pooling (default), `max` for max pooling, None for no pooling during scale transition blocks. Please note that this default differs from the DenseNetFCN paper in accordance with the DenseNet paper. # Returns a keras tensor # Raises ValueError: in case of invalid argument for `reduction` or `nb_dense_block` ''' with K.name_scope('DenseNet'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if reduction != 0.0: if not (reduction <= 1.0 and reduction > 0.0): raise ValueError('`reduction` value must lie between 0.0 and 1.0') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list if len(nb_layers) != (nb_dense_block): raise ValueError('If `nb_dense_block` is a list, its length must match ' 'the number of layers provided by `nb_layers`.') final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' count = int((depth - 4) / 3) if bottleneck: count = count // 2 nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count else: final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate # compute compression factor compression = 1.0 - reduction # Initial convolution if subsample_initial_block: initial_kernel = (7, 7) initial_strides = (2, 2) else: initial_kernel = (3, 3) initial_strides = (1, 1) x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', name='initial_conv2D', strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) if subsample_initial_block: x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) # Add dense blocks for block_idx in range(nb_dense_block - 1): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % block_idx) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling) nb_filter = int(nb_filter * compression) # The last dense_block does not have a transition_block x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % (nb_dense_block - 1)) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x) x = Activation('relu')(x) if include_top: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) x = Dense(nb_classes, activation=activation)(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) return x
def dense_bn(x, units, use_bias=True, scope=None, activation=None): with K.name_scope(scope): x = Dense(units=units, use_bias=use_bias)(x) x = BatchNormalization(momentum=0.9)(x) x = Activation(activation)(x) return x
def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, reduction=0.0, dropout_rate=None, weight_decay=1e-4, nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv', init_conv_filters=48, input_shape=None, activation='softmax', early_transition=False, transition_pooling='max', initial_kernel_size=(3, 3)): ''' Build the DenseNet-FCN model # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay nb_layers_per_block: number of layers in each dense block. Can be a positive integer or a list. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines type of upsampling algorithm used. input_shape: Only used for shape inference in fully convolutional networks. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. early_transition: Start with an extra initial transition down and end with an extra transition up to reduce the network size. transition_pooling: 'max' for max pooling (default), 'avg' for average pooling, None for no pooling. Please note that this default differs from the DenseNet paper in accordance with the DenseNetFCN paper. initial_kernel_size: The first Conv2D kernel might vary in size based on the application, this parameter makes it configurable. # Returns a keras tensor # Raises ValueError: in case of invalid argument for `reduction`, `nb_dense_block` or `nb_upsampling_conv`. ''' with K.name_scope('DenseNetFCN'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if concat_axis == 1: # channels_first dim ordering _, rows, cols = input_shape else: rows, cols, _ = input_shape if reduction != 0.0: if not (reduction <= 1.0 and reduction > 0.0): raise ValueError('`reduction` value must lie between 0.0 and 1.0') # check if upsampling_conv has minimum number of filters # minimum is set to 12, as at least 3 color channels are needed for correct upsampling if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0): raise ValueError('Parameter `nb_upsampling_conv` number of channels must ' 'be a positive number divisible by 4 and greater than 12') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list if len(nb_layers) != (nb_dense_block + 1): raise ValueError('If `nb_dense_block` is a list, its length must be ' '(`nb_dense_block` + 1)') bottleneck_nb_layers = nb_layers[-1] rev_layers = nb_layers[::-1] nb_layers.extend(rev_layers[1:]) else: bottleneck_nb_layers = nb_layers_per_block nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) # compute compression factor compression = 1.0 - reduction # Initial convolution x = Conv2D(init_conv_filters, initial_kernel_size, kernel_initializer='he_normal', padding='same', name='initial_conv2D', use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) x = Activation('relu')(x) nb_filter = init_conv_filters skip_list = [] if early_transition: x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_early', transition_pooling=transition_pooling) # Add dense blocks and transition down block for block_idx in range(nb_dense_block): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % block_idx) # Skip connection skip_list.append(x) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling) nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block # The last dense_block does not have a transition_down_block # return the concatenated feature maps without the concatenation of the input _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True, block_prefix='dense_%i' % nb_dense_block) skip_list = skip_list[::-1] # reverse the skip list # Add dense blocks and transition up block for block_idx in range(nb_dense_block): n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] # upsampling block must upsample only the feature maps (concat_list[1:]), # not the concatenation of the input with the feature maps (concat_list[0]. l = concatenate(concat_list[1:], axis=concat_axis) t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay, block_prefix='tr_up_%i' % block_idx) # concatenate the skip connection with the transition block x = concatenate([t, skip_list[block_idx]], axis=concat_axis) # Dont allow the feature map size to grow in upsampling dense blocks x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, growth_rate=growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True, grow_nb_filters=False, block_prefix='dense_%i' % (nb_dense_block + 1 + block_idx)) if early_transition: x_up = __transition_up_block(x_up, nb_filters=nb_filter, type=upsampling_type, weight_decay=weight_decay, block_prefix='tr_up_early') if include_top: x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, nb_classes))(x) x = Activation(activation)(x) x = Reshape((row, col, nb_classes))(x) else: x = x_up return x
def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, reduction=0.0, dropout_rate=None, weight_decay=1e-4, nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv', init_conv_filters=48, input_shape=None, activation='sigmoid', early_transition=False, transition_pooling='max', initial_kernel_size=(3, 3)): ''' Build the DenseNet-FCN model # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay nb_layers_per_block: number of layers in each dense block. Can be a positive integer or a list. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines type of upsampling algorithm used. input_shape: Only used for shape inference in fully convolutional networks. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. early_transition: Start with an extra initial transition down and end with an extra transition up to reduce the network size. transition_pooling: 'max' for max pooling (default), 'avg' for average pooling, None for no pooling. Please note that this default differs from the DenseNet paper in accordance with the DenseNetFCN paper. initial_kernel_size: The first Conv2D kernel might vary in size based on the application, this parameter makes it configurable. # Returns a keras tensor # Raises ValueError: in case of invalid argument for `reduction`, `nb_dense_block` or `nb_upsampling_conv`. ''' with K.name_scope('DenseNetFCN'): concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if concat_axis == 1: # channels_first dim ordering _, rows, cols = input_shape else: rows, cols, _ = input_shape if reduction != 0.0: if not (reduction <= 1.0 and reduction > 0.0): raise ValueError('`reduction` value must lie between 0.0 and 1.0') # check if upsampling_conv has minimum number of filters # minimum is set to 12, as at least 3 color channels are needed for correct upsampling if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0): raise ValueError('Parameter `nb_upsampling_conv` number of channels must ' 'be a positive number divisible by 4 and greater than 12') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list if len(nb_layers) != (nb_dense_block + 1): raise ValueError('If `nb_dense_block` is a list, its length must be ' '(`nb_dense_block` + 1)') bottleneck_nb_layers = nb_layers[-1] rev_layers = nb_layers[::-1] nb_layers.extend(rev_layers[1:]) else: bottleneck_nb_layers = nb_layers_per_block nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) # compute compression factor compression = 1.0 - reduction # Initial convolution x = Conv2D(init_conv_filters, initial_kernel_size, kernel_initializer='he_normal', padding='same', name='initial_conv2D', use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) x = Activation('relu')(x) nb_filter = init_conv_filters skip_list = [] if early_transition: x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_early', transition_pooling=transition_pooling) # Add dense blocks and transition down block for block_idx in range(nb_dense_block): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % block_idx) # Skip connection skip_list.append(x) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling) nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block # The last dense_block does not have a transition_down_block # return the concatenated feature maps without the concatenation of the input _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True, block_prefix='dense_%i' % nb_dense_block) skip_list = skip_list[::-1] # reverse the skip list # Add dense blocks and transition up block for block_idx in range(nb_dense_block): n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] # upsampling block must upsample only the feature maps (concat_list[1:]), # not the concatenation of the input with the feature maps (concat_list[0]. l = concatenate(concat_list[1:], axis=concat_axis) t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay, block_prefix='tr_up_%i' % block_idx) # concatenate the skip connection with the transition block x = concatenate([t, skip_list[block_idx]], axis=concat_axis) # Dont allow the feature map size to grow in upsampling dense blocks x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, growth_rate=growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True, grow_nb_filters=False, block_prefix='dense_%i' % (nb_dense_block + 1 + block_idx)) if early_transition: x_up = __transition_up_block(x_up, nb_filters=nb_filter, type=upsampling_type, weight_decay=weight_decay, block_prefix='tr_up_early') if include_top: x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, nb_classes))(x) x = Activation(activation)(x) x = Reshape((row, col, nb_classes))(x) else: x = x_up return x
def build(self, input_shapes): vdim = input_shapes[0][2] edim = input_shapes[1][2] udim = input_shapes[2][2] with kb.name_scope(self.name): with kb.name_scope('phi_v'): v_shapes = [self.units_e[-1] + vdim + udim] + self.units_v v_shapes = list(zip(v_shapes[:-1], v_shapes[1:])) self.phi_v_weights = [ self.add_weight(shape=i, initializer=self.kernel_initializer, name='weight_v_%d' % j, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) for j, i in enumerate(v_shapes) ] if self.use_bias: self.phi_v_biases = [ self.add_weight(shape=(i[-1], ), initializer=self.bias_initializer, name='bias_v_%d' % j, regularizer=self.bias_regularizer, constraint=self.bias_constraint) for j, i in enumerate(v_shapes) ] else: self.phi_v_biases = None with kb.name_scope('phi_e'): e_shapes = [2 * vdim + edim + udim] + self.units_e e_shapes = list(zip(e_shapes[:-1], e_shapes[1:])) self.phi_e_weights = [ self.add_weight(shape=i, initializer=self.kernel_initializer, name='weight_e_%d' % j, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) for j, i in enumerate(e_shapes) ] if self.use_bias: self.phi_e_biases = [ self.add_weight(shape=(i[-1], ), initializer=self.bias_initializer, name='bias_e_%d' % j, regularizer=self.bias_regularizer, constraint=self.bias_constraint) for j, i in enumerate(e_shapes) ] else: self.phi_e_biases = None with kb.name_scope('phi_u'): u_shapes = [self.units_e[-1] + self.units_v[-1] + udim ] + self.units_u u_shapes = list(zip(u_shapes[:-1], u_shapes[1:])) self.phi_u_weights = [ self.add_weight(shape=i, initializer=self.kernel_initializer, name='weight_u_%d' % j, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) for j, i in enumerate(u_shapes) ] if self.use_bias: self.phi_u_biases = [ self.add_weight(shape=(i[-1], ), initializer=self.bias_initializer, name='bias_u_%d' % j, regularizer=self.bias_regularizer, constraint=self.bias_constraint) for j, i in enumerate(u_shapes) ] else: self.phi_u_biases = None self.built = True
def build(self): inputs = Input(batch_shape=self.input_tensor) prev, cur = self.stem(filters=self.filters, stem_filters=self.stem_filters, net=inputs) for repeat in range(self.num_reduction_cells + 1): if repeat == self.num_reduction_cells and self.add_aux_output: prefix = 'aux_{}'.format(repeat * self.num_cell_repeats - 1) aux_outputs = self._AuxiliaryTop(classes=self.num_classes, prefix=prefix, net=cur) if repeat > 0: self.filters *= 2 prev, cur = cur, prev cur = self._ReductionCell( filters=self.filters, prefix='reduction_cell_{}'.format(repeat - 1), cur=prev, prev=cur) for cell_index in range(self.num_cell_repeats): prev, cur = cur, prev cur = self._NormalCell( filters=self.filters, prefix='cell_{}'.format(cell_index + repeat * self.num_cell_repeats), cur=prev, prev=cur) with K.name_scope('final_layer'): x = Activation('relu', name='last_relu')(cur) if self.include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dropout(rate=self.dropout_rate)(x) outputs = Dense(self.num_classes, activation='softmax', name='final_layer/FC')(x) model_suffix = 'with_top' else: if self.pooling == 'avg': outputs = GlobalAveragePooling2D(name='avg_pool')(x) elif self.pooling == 'max': outputs = GlobalMaxPooling2D(name='max_pool')(x) else: outputs = None raise Exception( 'Supported options for pooling: `avg` or `max` given pooling: {}' .format(self.pooling)) model_suffix = 'no_top' model_name = 'NASNet-A_{}@{}_{}_{}'.format(self.num_cell_repeats, self.penultimate_filters, self.num_classes, model_suffix) if self.add_aux_output: model = Model(inputs, [outputs, aux_outputs], name='{}_with_auxiliary_output'.format(model_name)) model.summary() return model else: model = Model(inputs, outputs, name=model_name) model.summary() return model
def _adjust_block(p, ip, filters, block_id=None): '''Adjusts the input `previous path` to match the shape of the `input`. Used in situations where the output number of filters needs to be changed. # Arguments p: Input tensor which needs to be modified ip: Input tensor whose shape needs to be matched filters: Number of output filters to be matched block_id: String block_id # Returns Adjusted Keras tensor ''' channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 ip_shape = backend.int_shape(ip) if p is not None: p_shape = backend.int_shape(p) with backend.name_scope('adjust_block'): if p is None: p = ip elif p_shape[img_dim] != ip_shape[img_dim]: with backend.name_scope('adjust_reduction_block_%s' % block_id): p = layers.Activation('relu', name='adjust_relu_1_%s' % block_id)(p) p1 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % block_id)(p) p1 = layers.Conv2D(filters // 2, (1, 1), padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, name='adjust_conv_1_%s' % block_id, kernel_initializer='he_normal')(p1) p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % block_id)(p2) p2 = layers.Conv2D(filters // 2, (1, 1), padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, name='adjust_conv_2_%s' % block_id, kernel_initializer='he_normal')(p2) p = layers.concatenate([p1, p2], axis=channel_dim) if use_bn: p = layers.BatchNormalization(axis=channel_dim, momentum=bn_momentum, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) p = layers.SpatialDropout2D(drop_p)(p) elif p_shape[channel_dim] != filters: with backend.name_scope('adjust_projection_block_%s' % block_id): p = layers.Activation('relu')(p) p = layers.Conv2D(filters, (1, 1), strides=(1, 1), kernel_regularizer=l2(weight_decay), padding='same', name='adjust_conv_projection_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(p) if use_bn: p = layers.BatchNormalization(axis=channel_dim, momentum=bn_momentum, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) p = layers.SpatialDropout2D(drop_p)(p) return p