def __medium_images_branch(self, x: tf.Tensor, is_training: bool = True) -> tf.Tensor: data_sub2 = downsample_bilinear(x=x, shrink_factor=2) conv1_1_3x3_s2 = downsample_conv2d(x=data_sub2, num_filters=32, kernel_size=(3, 3), name='conv1_1_3x3_s2') conv1_1_3x3_s2_bn = tf.layers.batch_normalization( inputs=conv1_1_3x3_s2, training=is_training, name='conv1_1_3x3_s2_bn') conv1_2_3x3 = dim_hold_conv2d(x=conv1_1_3x3_s2_bn, num_filters=32, kernel_size=(3, 3), name='conv1_2_3x3') conv1_2_3x3_bn = tf.layers.batch_normalization(inputs=conv1_2_3x3, training=is_training, name='conv1_2_3x3_bn') conv1_3_3x3 = dim_hold_conv2d(x=conv1_2_3x3_bn, num_filters=64, kernel_size=(3, 3), name='conv1_3_3x3') conv1_3_3x3_bn = tf.layers.batch_normalization(inputs=conv1_3_3x3, training=is_training, name='conv1_3_3x3_bn') pool1_3x3_s2 = max_pool2d(x=conv1_3_3x3_bn, window_shape=(3, 3), name='pool1_3x3_s2') return self.__residual_encoder_chain( x=pool1_3x3_s2, encoders_configs=ICNetBackbone.__MEDIUM_BRANCH_ENCODERS_CONFIGS, is_training=is_training)
def __medium_branch_tail(self, x: tf.Tensor, is_training: bool) -> tf.Tensor: h2_fs1 = bottleneck_conv2d(x=x, num_filters=64, name='h2_fs1') h2_fs_bn = tf.layers.batch_normalization(inputs=h2_fs1, training=is_training, name='h2_fs_bn') h2_conv3 = dim_hold_conv2d(x=h2_fs_bn, num_filters=128, kernel_size=(3, 3), name='h2_conv3') h2_fs2 = bottleneck_conv2d(x=h2_conv3, num_filters=64, name='h2_fs2') h2_conv4 = dim_hold_conv2d(x=h2_fs2, num_filters=128, kernel_size=(3, 3), name='h2_conv4') h2_fs3 = bottleneck_conv2d(x=h2_conv4, num_filters=64, name='h2_fs3') fuse = h2_fs1 + h2_fs3 fuse_bn = tf.layers.batch_normalization(inputs=fuse, training=is_training, name='fuse_bn') pp1 = pyramid_pool_fusion(x=fuse_bn, windows_shapes=[2, 3, 5], fuse_filters=128, name='h2_pp1') dilated_block1 = atrous_pyramid_encoder( x=pp1, output_filters=128, pyramid_heads_dilation_rate=[1, 2, 4, 8], use_residual_connection=False, name='h2_dilation_block') h2_dilated_block1_bn = tf.layers.batch_normalization( inputs=dilated_block1, training=is_training, name='h2_dilated_block1_bn') h2_fs4 = bottleneck_conv2d(x=h2_dilated_block1_bn, num_filters=64, name='h2_fs4') h2_conv5 = dim_hold_conv2d(x=h2_fs4, num_filters=128, kernel_size=(3, 3), name='h2_conv5') h2_fs5 = bottleneck_conv2d(x=h2_conv5, num_filters=64, name='h2_fs5') h2_conv6 = dim_hold_conv2d(x=h2_fs5, num_filters=256, kernel_size=(3, 3), name='h2_conv6') h2_fs6 = bottleneck_conv2d(x=h2_conv6, num_filters=128, name='h2_fs6') return tf.math.add(h2_fs6, pp1, name='h2_add')
def _blending_layer(x: tf.Tensor, output_filters: int, kernel: int, use_separable_conv: bool, activation: Optional[str], name: Optional[str], output_is_last_op_in_block: bool) -> tf.Tensor: blending_name = None if name is not None: blending_postfix = 'out' if output_is_last_op_in_block else 'blend' blending_name = prepare_block_operation_name(name, blending_postfix) kernel = kernel, kernel if use_separable_conv is True: return separable_conv2d( x=x, num_filters=output_filters, kernel_size=kernel, activation=activation, name=blending_name) else: return dim_hold_conv2d( x=x, num_filters=output_filters, kernel_size=kernel, activation=activation, name=blending_name)
def __prediction_branch(self, big_medium_fusion: tf.Tensor) -> tf.Tensor: quater_size_output = upsample_bilinear(x=big_medium_fusion, zoom_factor=2) return dim_hold_conv2d(x=quater_size_output, num_filters=self._output_classes, kernel_size=(3, 3), activation=None, name='cls')
def __medium_branch_head(self, x: tf.Tensor, is_training: bool) -> tf.Tensor: h2_sub1 = downsample_conv2d(x=x, num_filters=16, kernel_size=(3, 3), name='h2_sub1') h2_conv1 = dim_hold_conv2d(x=h2_sub1, num_filters=16, kernel_size=(3, 3), name='h2_conv1') h2_conv1_bn = tf.layers.batch_normalization(inputs=h2_conv1, training=is_training, name='h2_conv1_bn') h2_sub2 = downsample_conv2d(x=h2_conv1_bn, num_filters=32, kernel_size=(3, 3), name='h2_sub2') h2_conv2 = dim_hold_conv2d(x=h2_sub2, num_filters=64, kernel_size=(3, 3), name='h2_conv2') return tf.layers.batch_normalization(inputs=h2_conv2, training=is_training, name='h2_conv2_bn')
def pyramid_pool_fusion(x: tf.Tensor, windows_shapes: List[int], fuse_filters: int, fuse_kernel: Tuple[int, int] = (3, 3), pooling_type: str = 'AVG', pooling_strides: Tuple[int, int] = (2, 2), fusion_method: FusionMethod = FusionMethod.CONCAT, name: Optional[str] = None) -> tf.Tensor: """ Layer performs pooling operation with rectangular kernels according to :windows_shapes parameter. After that - pooling results are being concatenated along channel dimension and fusion operation (conv2d with strides=(1,1) and kernel size given by :fuse_kernel) is being applied. """ def __prepare_pooling_layer( layer_parameters: Tuple[int, int]) -> tf.Tensor: pool_layer_id, window_shape = layer_parameters return pool2d(x, window_shape=(window_shape, window_shape), pooling_type=pooling_type, strides=pooling_strides, name=__pooling_name_assigner(pool_layer_id)) def __pooling_name_assigner(pool_layer_id: int) -> Optional[str]: if name is None: return None else: return f'{name}/pooling_{pool_layer_id}' enumerated_shapes = list(enumerate(windows_shapes)) pooling_layers = list(map(__prepare_pooling_layer, enumerated_shapes)) if fusion_method is FusionMethod.CONCAT: fusion = tf.concat(pooling_layers, axis=-1) else: fusion = reduce(lambda acc, elem: acc + elem, pooling_layers) fuse_conv_name = f'{name}/fuse_conv' if name is not None else None return dim_hold_conv2d(x=fusion, num_filters=fuse_filters, kernel_size=fuse_kernel, name=fuse_conv_name)
def __cascade_fusion_block( self, smaller_input: tf.Tensor, bigger_input: tf.Tensor, is_training: bool, output_filters: int, base_name: str, ) -> tf.Tensor: upsampled = upsample_bilinear(x=smaller_input, zoom_factor=2) upsampled = dim_hold_conv2d(x=upsampled, num_filters=output_filters, kernel_size=(3, 3), name=f'{base_name}/fusion_conv') upsampled_bn = tf.layers.batch_normalization( inputs=upsampled, training=is_training, name=f'{base_name}/fusion_conv_bn') bigger_input = bottleneck_conv2d(x=bigger_input, num_filters=output_filters, name=f'{base_name}/bigger_input_fs') out = tf.math.add(upsampled_bn, bigger_input, name=f'{base_name}/add') return tf.nn.relu(out, name=f'{base_name}/relu')
def __small_branch(self, x: tf.Tensor, is_training: bool) -> tf.Tensor: h3_fs1 = bottleneck_conv2d(x=x, num_filters=64, name='h3_fs1') h3_fs1_bn = tf.layers.batch_normalization(inputs=h3_fs1, training=is_training, name='h3_fs1_bn') h3_pp1 = pyramid_pool_fusion(x=h3_fs1_bn, windows_shapes=[2, 3, 5], fuse_filters=128, name='h2_pp1') h3_fs2 = bottleneck_conv2d(x=h3_pp1, num_filters=64, name='h3_fs2') h3_conv1 = dim_hold_conv2d(x=h3_fs2, num_filters=128, kernel_size=(3, 3), name='h3_conv1') h3_fs3 = bottleneck_conv2d(x=h3_conv1, num_filters=64, name='h3_fs3') h3_conv2 = dim_hold_conv2d(x=h3_fs3, num_filters=256, kernel_size=(3, 3), name='h3_conv2') h3_fs4 = bottleneck_conv2d(x=h3_conv2, num_filters=128, name='h3_fs4') h3_add1 = tf.math.add(h3_pp1, h3_fs4, name='h3_add1') h3_conv3 = dim_hold_conv2d(x=h3_add1, num_filters=256, kernel_size=(3, 3), name='h3_conv3') h3_fs5 = bottleneck_conv2d(x=h3_conv3, num_filters=128, name='h3_fs5') h3_fs5_bn = tf.layers.batch_normalization(inputs=h3_fs5, training=is_training, name='h3_fs5_bn') h3_pp2 = pyramid_pool_fusion(x=h3_fs5_bn, windows_shapes=[2, 3, 5], fuse_filters=256, name='h3_pp2') h3_dilated_block_1 = atrous_pyramid_encoder( x=h3_pp2, output_filters=256, pyramid_heads_dilation_rate=[1, 2, 4, 8], use_residual_connection=False, name='h3_dilation_block_1') h3_dilated_block_1_bn = tf.layers.batch_normalization( inputs=h3_dilated_block_1, training=is_training, name='h3_dilated_block_1_bn') h3_fs6 = bottleneck_conv2d(x=h3_dilated_block_1_bn, num_filters=64, name='h3_fs6') h3_conv4 = dim_hold_conv2d(x=h3_fs6, num_filters=256, kernel_size=(3, 3), name='h3_conv4') h3_fs7 = bottleneck_conv2d(x=h3_conv4, num_filters=64, name='h3_fs7') h3_conv5 = dim_hold_conv2d(x=h3_fs7, num_filters=512, kernel_size=(3, 3), name='h3_conv5') h3_fs8 = bottleneck_conv2d(x=h3_conv5, num_filters=256, name='h3_fs8') h3_add2 = tf.math.add(h3_pp2, h3_fs8, name='h3_add2') h3_add2_bn = tf.layers.batch_normalization(inputs=h3_add2, training=is_training, name='h3_add2_bn') h3_fs9 = bottleneck_conv2d(x=h3_add2_bn, num_filters=128, name='h3_fs9') h3_conv6 = dim_hold_conv2d(x=h3_fs9, num_filters=512, kernel_size=(3, 3), name='h3_conv6') h3_fs10 = bottleneck_conv2d(x=h3_conv6, num_filters=128, name='h3_fs10') h3_add3 = tf.math.add(h3_fs9, h3_fs10, name='h3_add3') h3_add3_bn = tf.layers.batch_normalization(inputs=h3_add3, training=is_training, name='h3_add3_bn') h3_conv7 = dim_hold_conv2d(x=h3_add3_bn, num_filters=512, kernel_size=(3, 3), name='h3_conv7') h3_fs11 = bottleneck_conv2d(x=h3_conv7, num_filters=128, name='h3_fs11') h3_conv8 = dim_hold_conv2d(x=h3_fs11, num_filters=512, kernel_size=(3, 3), name='h3_conv8') h3_fs12 = bottleneck_conv2d(x=h3_conv8, num_filters=128, name='h3_fs12') h3_add4 = tf.math.add(h3_fs12, h3_add3_bn, name='h3_add4') h3_add4_bn = tf.layers.batch_normalization(inputs=h3_add4, training=is_training, name='h3_add4_bn') h3_conv9 = dim_hold_conv2d(x=h3_add4_bn, num_filters=768, kernel_size=(3, 3), name='h3_conv9') h3_fs13 = bottleneck_conv2d(x=h3_conv9, num_filters=128, name='h3_fs13') h3_conv10 = dim_hold_conv2d(x=h3_fs13, num_filters=768, kernel_size=(3, 3), name='h3_conv10') h3_fs14 = bottleneck_conv2d(x=h3_conv10, num_filters=128, name='h3_fs14') h3_add5 = tf.math.add(h3_fs14, h3_add4_bn, name='h3_add5') h3_add5_bn = tf.layers.batch_normalization(inputs=h3_add5, training=is_training, name='h3_add5_bn') h3_conv11 = dim_hold_conv2d(x=h3_add5_bn, num_filters=1024, kernel_size=(3, 3), name='h3_conv11') h3_fs15 = bottleneck_conv2d(x=h3_conv11, num_filters=256, name='h3_fs15') h3_conv12 = dim_hold_conv2d(x=h3_fs15, num_filters=1024, kernel_size=(3, 3), name='h3_conv12') h3_fs16 = bottleneck_conv2d(x=h3_conv12, num_filters=256, name='h3_fs16') h3_add6 = tf.math.add(h3_fs15, h3_fs16, name='h3_add6') return tf.layers.batch_normalization(inputs=h3_add6, training=is_training, name='h3_add6_bn')