def build_model(self, inputs, training=True, reuse=False): with var_storage.model_variable_scope( self.model_hparams.model_name, reuse=reuse, dtype=self.model_hparams.dtype): with tf.variable_scope("input_reshape"): if self.model_hparams.input_format == 'NHWC' and self.model_hparams.compute_format == 'NCHW': # Reshape inputs: NHWC => NCHW inputs = tf.transpose(inputs, [0, 3, 1, 2]) elif self.model_hparams.input_format == 'NCHW' and self.model_hparams.compute_format == 'NHWC': # Reshape inputs: NCHW => NHWC inputs = tf.transpose(inputs, [0, 2, 3, 1]) if self.model_hparams.dtype != inputs.dtype: inputs = tf.cast(inputs, self.model_hparams.dtype) net = blocks.conv2d_block( inputs, n_channels=64, kernel_size=(7, 7), strides=(2, 2), mode='SAME', use_batch_norm=True, activation='relu', is_training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, name='conv2d' ) net = layers.max_pooling2d( net, pool_size=(3, 3), strides=(2, 2), padding='SAME', data_format=self.model_hparams.compute_format, name="max_pooling2d", ) model_bottlenecks = self.model_hparams.layers_depth for block_id, block_bottleneck in enumerate(model_bottlenecks): for layer_id in range(self.model_hparams.layers_count[block_id]): stride = 2 if (layer_id == 0 and block_id != 0) else 1 net = blocks.bottleneck_block( inputs=net, depth=block_bottleneck * self.model_hparams.expansions, depth_bottleneck=block_bottleneck, cardinality=self.model_hparams.cardinality, stride=stride, training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, block_name="btlnck_block_%d_%d" % (block_id, layer_id), use_se=self.model_hparams.use_se, ratio=self.model_hparams.se_ratio) with tf.variable_scope("output"): net = layers.reduce_mean( net, keepdims=False, data_format=self.model_hparams.compute_format, name='spatial_mean') logits = layers.dense( inputs=net, units=self.model_hparams.n_classes, use_bias=True, trainable=training, kernel_initializer=self.dense_hparams.kernel_initializer, bias_initializer=self.dense_hparams.bias_initializer) if logits.dtype != tf.float32: logits = tf.cast(logits, tf.float32) probs = layers.softmax(logits, name="softmax", axis=1) return probs, logits
def _build(self, input: List[tf.keras.layers.Input]): """Creates an EfficientNet v1 graph given the model parameters. This function is wrapped by the `EfficientNet_v1` class to make a tf.keras.Model. Args: image_input: the input batch of images Returns: the output of efficientnet v1 """ config = self.config depth_coefficient = config.mparams.depth_coefficient blocks = config.mparams.blocks stem_base_filters = config.mparams.stem_base_filters top_base_filters = config.mparams.top_base_filters activation = get_activation(config.mparams.activation) dropout_rate = config.mparams.dropout_rate drop_connect_rate = config.mparams.drop_connect_rate num_classes = config.mparams.num_classes input_channels = config.mparams.input_channels rescale_input = config.mparams.rescale_input data_format = tf.keras.backend.image_data_format() dtype = config.mparams.dtype weight_decay = config.weight_decay weight_init = config.mparams.weight_init train_batch_size = config.train_batch_size do_mixup = config.mixup_alpha > 0 do_cutmix = config.cutmix_alpha > 0 def cond_mixing(args): images,mixup_weights,cutmix_masks,is_tr_split = args return tf.cond(tf.keras.backend.equal(is_tr_split[0],0), lambda: images, # eval phase lambda: mixing_lite(images,mixup_weights,cutmix_masks, train_batch_size, do_mixup, do_cutmix)) # tr phase images = input[0] x = images if len(input) > 1: # we get here only during train or train_and_eval modes if self.config.defer_img_mixing: # we get here only if we chose not to perform image mixing in the data loader # image mixing on device further accelrates training mixup_weights = input[1] cutmix_masks = input[2] is_tr_split = input[3] x = tf.keras.layers.Lambda(cond_mixing)([images,mixup_weights,cutmix_masks,is_tr_split]) # data loader outputs data in the channels last format if data_format == 'channels_first': # Happens on GPU/TPU if available. x = tf.keras.layers.Permute((3, 1, 2))(x) if rescale_input: # x-mean/std x = preprocessing.normalize_images(x, mean_rgb=config.mparams.mean_rgb, stddev_rgb=config.mparams.std_rgb, num_channels=input_channels, dtype=dtype, data_format=data_format) # Build stem x = conv2d_block(x, round_filters(stem_base_filters, config), config, kernel_size=[3, 3], strides=[2, 2], activation=activation, name='stem') # Build blocks num_blocks_total = sum( round_repeats(block['num_repeat'], depth_coefficient) for block in blocks) block_num = 0 for stack_idx, block in enumerate(blocks): assert block['num_repeat'] > 0 # Update block input and output filters based on depth multiplier block.update({ 'input_filters':round_filters(block['input_filters'], config), 'output_filters':round_filters(block['output_filters'], config), 'num_repeat':round_repeats(block['num_repeat'], depth_coefficient)}) # The first block needs to take care of stride and filter size increase drop_rate = drop_connect_rate * float(block_num) / num_blocks_total config.mparams.update({'drop_connect_rate': drop_rate}) # TODO(Sugh) replace block_prefix = 'stack_{}/block_0/'.format(stack_idx) x = mb_conv_block(x, block, config, block_prefix) block_num += 1 if block['num_repeat'] > 1: block.update({ 'input_filters':block['output_filters'], 'strides':(1, 1) }) for block_idx in range(block['num_repeat'] - 1): drop_rate = drop_connect_rate * float(block_num) / num_blocks_total config.mparams.update({'drop_connect_rate': drop_rate}) block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1) x = mb_conv_block(x, block, config, prefix=block_prefix) block_num += 1 # Build top x = conv2d_block(x, round_filters(top_base_filters, config), config, activation=activation, name='top') # Build classifier DENSE_KERNEL_INITIALIZER['config']['mode'] = weight_init x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x) if dropout_rate and dropout_rate > 0: x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x) x = tf.keras.layers.Dense( num_classes, kernel_initializer=DENSE_KERNEL_INITIALIZER, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), bias_regularizer=tf.keras.regularizers.l2(weight_decay), name='logits')(x) x = tf.keras.layers.Activation('softmax', name='probs', dtype=tf.float32)(x) return x
def mb_conv_block(inputs: tf.Tensor, block: dict, config: dict, prefix: Text = None): """Mobile Inverted Residual Bottleneck. Args: inputs: the Keras input to the block block: BlockConfig, arguments to create a Block config: ModelConfig, a set of model parameters prefix: prefix for naming all layers Returns: the output of the block """ use_se = config.mparams.use_se if 'use_se' in config.mparams else block[ 'se_ratio'] is not None activation = get_activation(config.mparams.activation) drop_connect_rate = config.mparams.drop_connect_rate data_format = tf.keras.backend.image_data_format() use_depthwise = block['conv_type'] != 'no_depthwise' prefix = prefix or '' filters = block['input_filters'] * block['expand_ratio'] x = inputs if block['fused_conv']: # If we use fused mbconv, skip expansion and use regular conv. x = conv2d_block(x, filters, config, kernel_size=block['kernel_size'], strides=block['strides'], activation=activation, name=prefix + 'fused') else: if block['expand_ratio'] != 1: # Expansion phase kernel_size = (1, 1) if use_depthwise else (3, 3) x = conv2d_block(x, filters, config, kernel_size=kernel_size, activation=activation, name=prefix + 'expand') # Depthwise Convolution if use_depthwise: x = conv2d_block(x, conv_filters=None, config=config, kernel_size=block['kernel_size'], strides=block['strides'], activation=activation, depthwise=True, name=prefix + 'depthwise') # Squeeze and Excitation phase if use_se: assert block['se_ratio'] is not None assert 0 < block['se_ratio'] <= 1 num_reduced_filters = max( 1, int(block['input_filters'] * block['se_ratio'])) if data_format == 'channels_first': se_shape = (filters, 1, 1) else: se_shape = (1, 1, filters) se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se) se = conv2d_block(se, num_reduced_filters, config, use_bias=True, use_batch_norm=False, activation=activation, name=prefix + 'se_reduce') se = conv2d_block(se, filters, config, use_bias=True, use_batch_norm=False, activation='sigmoid', name=prefix + 'se_expand') x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite') # Output phase x = conv2d_block(x, block['output_filters'], config, activation=None, name=prefix + 'project') # Add identity so that quantization-aware training can insert quantization # ops correctly. x = tf.keras.layers.Activation(get_activation('identity'), name=prefix + 'id')(x) if (block['id_skip'] and all(s == 1 for s in block['strides']) and block['input_filters'] == block['output_filters']): if drop_connect_rate and drop_connect_rate > 0: # Apply dropconnect # The only difference between dropout and dropconnect in TF is scaling by # drop_connect_rate during training. See: # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612 x = tf.keras.layers.Dropout(drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(x) x = tf.keras.layers.add([x, inputs], name=prefix + 'add') return x
def build_model(self, inputs, training=True, reuse=False): with var_storage.model_variable_scope(self.model_hparams.model_name, reuse=reuse, dtype=self.model_hparams.dtype): with tf.variable_scope("input_reshape"): if self.model_hparams.input_format == 'NHWC' and self.model_hparams.compute_format == 'NCHW': # Reshape inputs: NHWC => NCHW inputs = tf.transpose(inputs, [0, 3, 1, 2]) elif self.model_hparams.input_format == 'NCHW' and self.model_hparams.compute_format == 'NHWC': # Reshape inputs: NCHW => NHWC inputs = tf.transpose(inputs, [0, 2, 3, 1]) if self.model_hparams.dtype != inputs.dtype: inputs = tf.cast(inputs, self.model_hparams.dtype) net = blocks.conv2d_block( inputs, n_channels=64, # n_channels=16, kernel_size=(7, 7), strides=(2, 2), mode='SAME_RESNET', use_batch_norm=True, activation='relu', is_training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, name='conv2d') net = layers.max_pooling2d( net, pool_size=(3, 3), strides=(2, 2), padding='SAME', data_format=self.model_hparams.compute_format, name="max_pooling2d", ) for block_id, _ in enumerate( range(self.model_hparams.layer_counts[0])): net = blocks.bottleneck_block( inputs=net, depth=256, depth_bottleneck=64, stride=1, training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, block_name="btlnck_block_1_%d" % (block_id + 1)) for block_id, i in enumerate( range(self.model_hparams.layer_counts[1])): stride = 2 if i == 0 else 1 net = blocks.bottleneck_block( inputs=net, depth=512, depth_bottleneck=128, stride=stride, training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, block_name="btlnck_block_2_%d" % (block_id + 1)) for block_id, i in enumerate( range(self.model_hparams.layer_counts[2])): block_id += 1 stride = 2 if i == 0 else 1 net = blocks.bottleneck_block( inputs=net, depth=1024, depth_bottleneck=256, stride=stride, training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, block_name="btlnck_block_3_%d" % (block_id + 1)) for block_id, i in enumerate( range(self.model_hparams.layer_counts[3])): stride = 2 if i == 0 else 1 net = blocks.bottleneck_block( inputs=net, depth=2048, depth_bottleneck=512, stride=stride, training=training, data_format=self.model_hparams.compute_format, conv2d_hparams=self.conv2d_hparams, batch_norm_hparams=self.batch_norm_hparams, block_name="btlnck_block_4_%d" % (block_id + 1)) with tf.variable_scope("output"): net = layers.reduce_mean( net, keepdims=False, data_format=self.model_hparams.compute_format, name='spatial_mean') logits = layers.dense( inputs=net, units=self.model_hparams.n_classes, use_bias=True, trainable=training, kernel_initializer=self.dense_hparams.kernel_initializer, bias_initializer=self.dense_hparams.bias_initializer) if logits.dtype != tf.float32: logits = tf.cast(logits, tf.float32, name="logits") probs = layers.softmax(logits, name="softmax", axis=1) return probs, logits
def efficientnet(input: List[tf.keras.layers.Input], config: dict): """Creates an EfficientNet graph given the model parameters. This function is wrapped by the `EfficientNet` class to make a tf.keras.Model. Args: image_input: the input batch of images config: the model config Returns: the output of efficientnet """ depth_coefficient = config['depth_coefficient'] blocks = config['blocks'] stem_base_filters = config['stem_base_filters'] top_base_filters = config['top_base_filters'] activation = get_activation(config['activation']) dropout_rate = config['dropout_rate'] drop_connect_rate = config['drop_connect_rate'] num_classes = config['num_classes'] input_channels = config['input_channels'] rescale_input = config['rescale_input'] data_format = tf.keras.backend.image_data_format() dtype = config['dtype'] weight_decay = config['weight_decay'] weight_init = config['weight_init'] # Move the mixup of images to device images = input[0] if len(input) > 1: mix_weight = input[1] x = (images * mix_weight + images[::-1] * (1. - mix_weight)) else: x = images if data_format == 'channels_first': # Happens on GPU/TPU if available. x = tf.keras.layers.Permute((3, 1, 2))(x) if rescale_input: x = preprocessing.normalize_images(x, num_channels=input_channels, dtype=dtype, data_format=data_format) # Build stem x = conv2d_block(x, round_filters(stem_base_filters, config), config, kernel_size=[3, 3], strides=[2, 2], activation=activation, name='stem') # Build blocks num_blocks_total = sum( round_repeats(block['num_repeat'], depth_coefficient) for block in blocks) block_num = 0 for stack_idx, block in enumerate(blocks): assert block['num_repeat'] > 0 # Update block input and output filters based on depth multiplier block.update({ 'input_filters': round_filters(block['input_filters'], config), 'output_filters': round_filters(block['output_filters'], config), 'num_repeat': round_repeats(block['num_repeat'], depth_coefficient) }) # The first block needs to take care of stride and filter size increase drop_rate = drop_connect_rate * float(block_num) / num_blocks_total config.update({'drop_connect_rate': drop_rate}) # TODO(Sugh) replace block_prefix = 'stack_{}/block_0/'.format(stack_idx) x = mb_conv_block(x, block, config, block_prefix) block_num += 1 if block['num_repeat'] > 1: block.update({ 'input_filters': block['output_filters'], 'strides': (1, 1) }) for block_idx in range(block['num_repeat'] - 1): drop_rate = drop_connect_rate * float( block_num) / num_blocks_total config.update({'drop_connect_rate': drop_rate}) block_prefix = 'stack_{}/block_{}/'.format( stack_idx, block_idx + 1) x = mb_conv_block(x, block, config, prefix=block_prefix) block_num += 1 # Build top x = conv2d_block(x, round_filters(top_base_filters, config), config, activation=activation, name='top') # Build classifier DENSE_KERNEL_INITIALIZER['config']['mode'] = weight_init x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x) if dropout_rate and dropout_rate > 0: x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x) x = tf.keras.layers.Dense( num_classes, kernel_initializer=DENSE_KERNEL_INITIALIZER, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), bias_regularizer=tf.keras.regularizers.l2(weight_decay), name='logits')(x) x = tf.keras.layers.Activation('softmax', name='probs', dtype=tf.float32)(x) return x
def bottleneck_block(inputs, depth, depth_bottleneck, stride, training=True, data_format='NCHW', conv2d_hparams=None, batch_norm_hparams=None, block_name="bottleneck_block"): if data_format not in ['NHWC', 'NCHW']: raise ValueError( "Unknown data format: `%s` (accepted: ['NHWC', 'NCHW'])" % data_format) if not isinstance(conv2d_hparams, tf.contrib.training.HParams): raise ValueError( "The paramater `conv2d_hparams` is not of type `HParams`") if not isinstance(batch_norm_hparams, tf.contrib.training.HParams): raise ValueError( "The paramater `conv2d_hparams` is not of type `HParams`") in_shape = inputs.get_shape() in_size = in_shape[1] if data_format == "NCHW" else in_shape[-1] with tf.variable_scope(block_name): with tf.variable_scope("shortcut"): if depth == in_size: if stride == 1: shortcut = tf.identity(inputs) else: shortcut = layers.average_pooling2d( inputs, pool_size=(1, 1), strides=(stride, stride), padding='valid', data_format='channels_first' if data_format == 'NCHW' else 'channels_last', name="average_pooling2d", ) else: shortcut = blocks.conv2d_block( inputs, n_channels=depth, kernel_size=(1, 1), strides=(stride, stride), mode='SAME', use_batch_norm=True, activation= None, # Applied at the end after addition with bottleneck is_training=training, data_format=data_format, conv2d_hparams=conv2d_hparams, batch_norm_hparams=batch_norm_hparams) bottleneck = blocks.conv2d_block(inputs, n_channels=depth_bottleneck, kernel_size=(1, 1), strides=(1, 1), mode='SAME', use_batch_norm=True, activation='relu', is_training=training, data_format=data_format, conv2d_hparams=conv2d_hparams, batch_norm_hparams=batch_norm_hparams, name='bottleneck_1') bottleneck = blocks.conv2d_block(bottleneck, n_channels=depth_bottleneck, kernel_size=(3, 3), strides=(stride, stride), mode='SAME_RESNET', use_batch_norm=True, activation='relu', is_training=training, data_format=data_format, conv2d_hparams=conv2d_hparams, batch_norm_hparams=batch_norm_hparams, name='bottleneck_2') bottleneck = blocks.conv2d_block( bottleneck, n_channels=depth, kernel_size=(1, 1), strides=(1, 1), mode='SAME', use_batch_norm=True, activation=None, # Applied at the end after addition with shortcut is_training=training, data_format=data_format, conv2d_hparams=conv2d_hparams, batch_norm_hparams=batch_norm_hparams, name='bottleneck_3') return layers.relu(shortcut + bottleneck, name='relu')