def __small_images_branch(self, conv3_1: tf.Tensor, input_size: Size, is_training: bool = True) -> tf.Tensor: conv3_1_sub4 = downsample_bilinear(x=conv3_1, shrink_factor=2) conv_5_3 = self.__residual_encoder_chain( x=conv3_1_sub4, encoders_configs=ICNetBackbone.__SMALL_BRANCH_ENCODERS_CONFIGS, is_training=is_training) pyramid_pooling_config = [((32, 64), (32, 64)), ((16, 32), (16, 32)), ((13, 25), (10, 20)), ((8, 16), (5, 10))] pooling_output_size = input_size[0] // 32, input_size[1] // 32 conv_5_3_sum = pyramid_pooling(x=conv_5_3, pooling_config=pyramid_pooling_config, output_size=pooling_output_size) conv5_4_k1 = bottleneck_conv2d(x=conv_5_3_sum, num_filters=256, name='conv5_4_k1') conv5_4_k1_bn = tf.layers.batch_normalization(inputs=conv5_4_k1, training=is_training, name='conv5_4_k1_bn') conv5_4_interp = upsample_bilinear(x=conv5_4_k1_bn, zoom_factor=2) conv_sub4 = atrous_conv2d(x=conv5_4_interp, num_filters=128, kernel_size=(3, 3), name='conv_sub4') return tf.layers.batch_normalization(inputs=conv_sub4, training=is_training, name='conv_sub4_bn')
def feed_forward(self, x: tf.Tensor, is_training: bool = True, nodes_to_return: RequiredNodes = None) -> NetworkOutput: if self._image_mean is not None: x -= self._image_mean x = tf.math.divide(x, 255.0) input_size = x.shape[1], x.shape[2] conv3_sub1_proj = self.__big_images_branch(x=x, is_training=is_training) conv3_1 = self.__medium_images_branch(x) conv3_1_sub2_proj = bottleneck_conv2d(x=conv3_1, num_filters=128, activation=None, name='conv3_1_sub2_proj') conv_sub4 = self.__small_images_branch(conv3_1=conv3_1, input_size=input_size) conv_sub2 = self.__medium_small_branch_fusion( conv_sub4=conv_sub4, conv3_1_sub2_proj=conv3_1_sub2_proj, is_training=is_training) conv6_cls = self.__big_medium_branch_fusion( conv_sub2=conv_sub2, conv3_sub1_proj=conv3_sub1_proj) conv6_interp = upsample_bilinear(x=conv6_cls, zoom_factor=4) return self._construct_output(feedforward_output=conv6_interp, nodes_to_return=nodes_to_return)
def _reduce_conv(x: tf.Tensor, filters: int, input_stride: int, is_training: bool, name: Optional[str]) -> tf.Tensor: bn_name, conv_name = None, None if name is not None: conv_name = prepare_block_operation_name(name, '1x1_reduce', 'relu') bn_name = prepare_block_operation_name(name, '1x1_reduce', 'bn') if input_stride > 1: stride = input_stride, input_stride conv = downsample_conv2d( x=x, num_filters=filters, kernel_size=(1, 1), strides=stride, name=conv_name) else: conv = bottleneck_conv2d( x=x, num_filters=filters, name=conv_name) return tf.layers.batch_normalization( inputs=conv, training=is_training, name=bn_name)
def __big_images_branch(self, x: tf.Tensor, is_training: bool = True) -> tf.Tensor: conv1_sub1 = downsample_conv2d(x=x, num_filters=32, kernel_size=(3, 3), name='conv1_sub1') conv1_sub1_bn = tf.layers.batch_normalization(inputs=conv1_sub1, training=is_training, name='conv1_sub1_bn') conv2_sub1 = downsample_conv2d(x=conv1_sub1_bn, num_filters=32, kernel_size=(3, 3), name='conv2_sub1') conv2_sub1_bn = tf.layers.batch_normalization(inputs=conv2_sub1, training=is_training, name='conv2_sub1_bn') conv3_sub1 = downsample_conv2d(x=conv2_sub1_bn, num_filters=64, kernel_size=(3, 3), name='conv3_sub1') conv3_sub1_bn = tf.layers.batch_normalization(inputs=conv3_sub1, training=is_training, name='conv3_sub1_bn') conv3_sub1_proj = bottleneck_conv2d(x=conv3_sub1_bn, num_filters=128, activation=None, name='conv3_sub1_proj') return tf.layers.batch_normalization(inputs=conv3_sub1_proj, training=is_training, name='conv3_sub1_proj_bn')
def training_pass(self, x: tf.Tensor, y: tf.Tensor) -> tf.Operation: nodes_to_return = ['medium_small_fusion', 'big_medium_fusion', 'cls'] model_output = self.feed_forward(x=x, is_training=True, nodes_to_return=nodes_to_return) medium_small_fusion = model_output['medium_small_fusion'] big_medium_fusion = model_output['big_medium_fusion'] cls = model_output['cls'] medium_small_fusion = bottleneck_conv2d( x=medium_small_fusion, num_filters=self._output_classes, activation=None, name='medium_small_fusion_cls') big_medium_fusion = bottleneck_conv2d(x=big_medium_fusion, num_filters=self._output_classes, activation=None, name='big_medium_fusion_cls') cls_outputs = [medium_small_fusion, big_medium_fusion, cls] cls_weights = [self._labmda_1, self._lambda_2, self._lambda_3] return cascade_loss(cascade_output_nodes=cls_outputs, y=y, loss_weights=cls_weights, labels_to_ignore=self._ignore_labels)
def training_pass(self, x: tf.Tensor, y: tf.Tensor) -> tf.Operation: nodes_to_return = ['conv_sub4', 'conv_sub2', 'conv6_cls'] model_output = self.feed_forward(x=x, is_training=True, nodes_to_return=nodes_to_return) conv_sub4 = model_output['conv_sub4'] conv_sub2 = model_output['conv_sub2'] conv6_cls = model_output['conv6_cls'] conv_sub4_cls = bottleneck_conv2d(x=conv_sub4, num_filters=self._output_classes, activation=None, name='conv_sub4_cls') conv_sub2_cls = bottleneck_conv2d(x=conv_sub2, num_filters=self._output_classes, activation=None, name='conv_sub2_cls') cls_outputs = [conv_sub4_cls, conv_sub2_cls, conv6_cls] cls_weights = [self._labmda_1, self._lambda_2, self._lambda_3] return cascade_loss(cascade_output_nodes=cls_outputs, y=y, weight_decay=self.__weight_decay, loss_weights=cls_weights, labels_to_ignore=self._ignore_labels)
def training_pass(self, x: tf.Tensor, y: tf.Tensor) -> tf.Operation: if self._image_mean is not None: y -= self._image_mean y = tf.math.divide(y, 255.0) nodes_to_return = ['conv_sub4', 'conv_sub2', 'conv6_cls'] model_output = self.feed_forward(x=x, is_training=True, nodes_to_return=nodes_to_return) conv_sub4 = model_output['conv_sub4'] conv_sub2 = model_output['conv_sub2'] conv6_cls = model_output['conv6_cls'] conv_sub4_cls = bottleneck_conv2d(x=conv_sub4, num_filters=self._output_classes, activation=None, name='conv_sub4_cls') conv_sub2_cls = bottleneck_conv2d(x=conv_sub2, num_filters=self._output_classes, activation=None, name='conv_sub2_cls') cls_outputs = [conv_sub4_cls, conv_sub2_cls, conv6_cls] cls_weights = [self._labmda_1, self._lambda_2, self._lambda_3] return reconstruction_loss(cascade_output_nodes=cls_outputs, y=y, loss_weights=cls_weights)
def _increase_conv(x: tf.Tensor, filters: int, is_training: bool, name: Optional[str]) -> tf.Tensor: bn_name, conv_name = None, None if name is not None: conv_name = prepare_block_operation_name(name, '1x1_increase') bn_name = prepare_block_operation_name(name, '1x1_increase', 'bn') conv = bottleneck_conv2d( x=x, num_filters=filters, activation=None, name=conv_name) return tf.layers.batch_normalization( inputs=conv, training=is_training, name=bn_name)
def _projection_conv(x: tf.Tensor, filters: int, stride: int, name: Optional[str], is_training: bool = True) -> tf.Tensor: bn_name = None if name is not None: name = prepare_block_operation_name(name, '1x1_proj') bn_name = prepare_block_operation_name(name, '1x1_proj', 'bn') stride = stride, stride projection = bottleneck_conv2d( x=x, num_filters=filters, strides=stride, activation=None, name=name) return tf.layers.batch_normalization( inputs=projection, training=is_training, name=bn_name)
def __medium_branch_tail(self, x: tf.Tensor, is_training: bool) -> tf.Tensor: h2_fs1 = bottleneck_conv2d(x=x, num_filters=64, name='h2_fs1') h2_fs_bn = tf.layers.batch_normalization(inputs=h2_fs1, training=is_training, name='h2_fs_bn') h2_conv3 = dim_hold_conv2d(x=h2_fs_bn, num_filters=128, kernel_size=(3, 3), name='h2_conv3') h2_fs2 = bottleneck_conv2d(x=h2_conv3, num_filters=64, name='h2_fs2') h2_conv4 = dim_hold_conv2d(x=h2_fs2, num_filters=128, kernel_size=(3, 3), name='h2_conv4') h2_fs3 = bottleneck_conv2d(x=h2_conv4, num_filters=64, name='h2_fs3') fuse = h2_fs1 + h2_fs3 fuse_bn = tf.layers.batch_normalization(inputs=fuse, training=is_training, name='fuse_bn') pp1 = pyramid_pool_fusion(x=fuse_bn, windows_shapes=[2, 3, 5], fuse_filters=128, name='h2_pp1') dilated_block1 = atrous_pyramid_encoder( x=pp1, output_filters=128, pyramid_heads_dilation_rate=[1, 2, 4, 8], use_residual_connection=False, name='h2_dilation_block') h2_dilated_block1_bn = tf.layers.batch_normalization( inputs=dilated_block1, training=is_training, name='h2_dilated_block1_bn') h2_fs4 = bottleneck_conv2d(x=h2_dilated_block1_bn, num_filters=64, name='h2_fs4') h2_conv5 = dim_hold_conv2d(x=h2_fs4, num_filters=128, kernel_size=(3, 3), name='h2_conv5') h2_fs5 = bottleneck_conv2d(x=h2_conv5, num_filters=64, name='h2_fs5') h2_conv6 = dim_hold_conv2d(x=h2_fs5, num_filters=256, kernel_size=(3, 3), name='h2_conv6') h2_fs6 = bottleneck_conv2d(x=h2_conv6, num_filters=128, name='h2_fs6') return tf.math.add(h2_fs6, pp1, name='h2_add')
def __cascade_fusion_block( self, smaller_input: tf.Tensor, bigger_input: tf.Tensor, is_training: bool, output_filters: int, base_name: str, ) -> tf.Tensor: upsampled = upsample_bilinear(x=smaller_input, zoom_factor=2) upsampled = dim_hold_conv2d(x=upsampled, num_filters=output_filters, kernel_size=(3, 3), name=f'{base_name}/fusion_conv') upsampled_bn = tf.layers.batch_normalization( inputs=upsampled, training=is_training, name=f'{base_name}/fusion_conv_bn') bigger_input = bottleneck_conv2d(x=bigger_input, num_filters=output_filters, name=f'{base_name}/bigger_input_fs') out = tf.math.add(upsampled_bn, bigger_input, name=f'{base_name}/add') return tf.nn.relu(out, name=f'{base_name}/relu')
def _atrous_encoder_input_head(x: tf.Tensor, filters: int, activation: Optional[str], use_separable_conv: bool, name: Optional[str], head_id: Optional[str]) -> tf.Tensor: if name is not None: name = prepare_block_operation_name( name, 'input_head', f'reduction_1x1_conv_{head_id}') if use_separable_conv: return separable_bottleneck_conv2d( x, filters, activation=activation, name=name) else: return bottleneck_conv2d( x, filters, activation=activation, name=name)
def __small_branch(self, x: tf.Tensor, is_training: bool) -> tf.Tensor: h3_fs1 = bottleneck_conv2d(x=x, num_filters=64, name='h3_fs1') h3_fs1_bn = tf.layers.batch_normalization(inputs=h3_fs1, training=is_training, name='h3_fs1_bn') h3_pp1 = pyramid_pool_fusion(x=h3_fs1_bn, windows_shapes=[2, 3, 5], fuse_filters=128, name='h2_pp1') h3_fs2 = bottleneck_conv2d(x=h3_pp1, num_filters=64, name='h3_fs2') h3_conv1 = dim_hold_conv2d(x=h3_fs2, num_filters=128, kernel_size=(3, 3), name='h3_conv1') h3_fs3 = bottleneck_conv2d(x=h3_conv1, num_filters=64, name='h3_fs3') h3_conv2 = dim_hold_conv2d(x=h3_fs3, num_filters=256, kernel_size=(3, 3), name='h3_conv2') h3_fs4 = bottleneck_conv2d(x=h3_conv2, num_filters=128, name='h3_fs4') h3_add1 = tf.math.add(h3_pp1, h3_fs4, name='h3_add1') h3_conv3 = dim_hold_conv2d(x=h3_add1, num_filters=256, kernel_size=(3, 3), name='h3_conv3') h3_fs5 = bottleneck_conv2d(x=h3_conv3, num_filters=128, name='h3_fs5') h3_fs5_bn = tf.layers.batch_normalization(inputs=h3_fs5, training=is_training, name='h3_fs5_bn') h3_pp2 = pyramid_pool_fusion(x=h3_fs5_bn, windows_shapes=[2, 3, 5], fuse_filters=256, name='h3_pp2') h3_dilated_block_1 = atrous_pyramid_encoder( x=h3_pp2, output_filters=256, pyramid_heads_dilation_rate=[1, 2, 4, 8], use_residual_connection=False, name='h3_dilation_block_1') h3_dilated_block_1_bn = tf.layers.batch_normalization( inputs=h3_dilated_block_1, training=is_training, name='h3_dilated_block_1_bn') h3_fs6 = bottleneck_conv2d(x=h3_dilated_block_1_bn, num_filters=64, name='h3_fs6') h3_conv4 = dim_hold_conv2d(x=h3_fs6, num_filters=256, kernel_size=(3, 3), name='h3_conv4') h3_fs7 = bottleneck_conv2d(x=h3_conv4, num_filters=64, name='h3_fs7') h3_conv5 = dim_hold_conv2d(x=h3_fs7, num_filters=512, kernel_size=(3, 3), name='h3_conv5') h3_fs8 = bottleneck_conv2d(x=h3_conv5, num_filters=256, name='h3_fs8') h3_add2 = tf.math.add(h3_pp2, h3_fs8, name='h3_add2') h3_add2_bn = tf.layers.batch_normalization(inputs=h3_add2, training=is_training, name='h3_add2_bn') h3_fs9 = bottleneck_conv2d(x=h3_add2_bn, num_filters=128, name='h3_fs9') h3_conv6 = dim_hold_conv2d(x=h3_fs9, num_filters=512, kernel_size=(3, 3), name='h3_conv6') h3_fs10 = bottleneck_conv2d(x=h3_conv6, num_filters=128, name='h3_fs10') h3_add3 = tf.math.add(h3_fs9, h3_fs10, name='h3_add3') h3_add3_bn = tf.layers.batch_normalization(inputs=h3_add3, training=is_training, name='h3_add3_bn') h3_conv7 = dim_hold_conv2d(x=h3_add3_bn, num_filters=512, kernel_size=(3, 3), name='h3_conv7') h3_fs11 = bottleneck_conv2d(x=h3_conv7, num_filters=128, name='h3_fs11') h3_conv8 = dim_hold_conv2d(x=h3_fs11, num_filters=512, kernel_size=(3, 3), name='h3_conv8') h3_fs12 = bottleneck_conv2d(x=h3_conv8, num_filters=128, name='h3_fs12') h3_add4 = tf.math.add(h3_fs12, h3_add3_bn, name='h3_add4') h3_add4_bn = tf.layers.batch_normalization(inputs=h3_add4, training=is_training, name='h3_add4_bn') h3_conv9 = dim_hold_conv2d(x=h3_add4_bn, num_filters=768, kernel_size=(3, 3), name='h3_conv9') h3_fs13 = bottleneck_conv2d(x=h3_conv9, num_filters=128, name='h3_fs13') h3_conv10 = dim_hold_conv2d(x=h3_fs13, num_filters=768, kernel_size=(3, 3), name='h3_conv10') h3_fs14 = bottleneck_conv2d(x=h3_conv10, num_filters=128, name='h3_fs14') h3_add5 = tf.math.add(h3_fs14, h3_add4_bn, name='h3_add5') h3_add5_bn = tf.layers.batch_normalization(inputs=h3_add5, training=is_training, name='h3_add5_bn') h3_conv11 = dim_hold_conv2d(x=h3_add5_bn, num_filters=1024, kernel_size=(3, 3), name='h3_conv11') h3_fs15 = bottleneck_conv2d(x=h3_conv11, num_filters=256, name='h3_fs15') h3_conv12 = dim_hold_conv2d(x=h3_fs15, num_filters=1024, kernel_size=(3, 3), name='h3_conv12') h3_fs16 = bottleneck_conv2d(x=h3_conv12, num_filters=256, name='h3_fs16') h3_add6 = tf.math.add(h3_fs15, h3_fs16, name='h3_add6') return tf.layers.batch_normalization(inputs=h3_add6, training=is_training, name='h3_add6_bn')