def residual_block(self, input_x, output_channels=None, stride=1, scope_name=None): """ residual_block结构: 1、 batch_norm+relu 2、 1*1 卷积+batch_norm+relu 3、 3*3卷积,stride采样+batch_norm+relu 4、 1*1卷积 如果输入通道数和输出通道数相等且residual_block的stride=1,将4的输出结果与1输出结果相加 如果输入通道数和输出通道数不相等,或者residual_block的stride不等于1:执行5 5、 1输出的结果进行1*1卷积,stride采样 将5的输出结果和4的输出结果相加 """ input_channels = input_x.get_shape()[-1].value if output_channels is None: output_channels = input_channels scope = arg_scope_.arg_scope(is_training=self.is_training) with tf.variable_scope(scope_name): with slim.arg_scope(scope): x_1 = slim.batch_norm(input_x, scope='pre_batch_norm') x_ = slim.conv2d(x_1, output_channels//4, [1, 1], stride=1, padding='SAME', scope='conv1') x_ = slim.conv2d(x_, output_channels//4, [3, 3], stride=stride, padding='SAME', scope='conv2') x_ = slim.conv2d( x_, output_channels, [1, 1], stride=1, padding='SAME', normalizer_fn=None, activation_fn=None, scope='conv3' ) if (input_channels != output_channels) or (stride != 1): input_x = slim.conv2d( x_1, output_channels, [1, 1], stride=stride, padding='SAME', normalizer_fn=None, activation_fn=None, scope='conv_ad' ) output = x_ + input_x return output
def batch_norm(self, input_x, is_training=True, scope_name=None): sc = arg_scope_.arg_scope(is_training=is_training) with tf.variable_scope(scope_name): with slim.arg_scope(sc): out = slim.batch_norm(input_x, scope_name='batch_norm') return out
def attention_block_stage0(self, input_x, output_channels, sub_scope_num): with tf.variable_scope( 'attention_block_stage0-{}'.format(sub_scope_num)): # 1、residual_attention_bolck 之前的卷积 x = self.residual_block.residual_block(input_x, output_channels, scope_name='head_block') # 2、输出x分为两个分支 # 2.1分支:2个residual_block堆叠,输出trunk out_trunk = self.residual_block.residual_block( x, output_channels, scope_name='trunk_block1') out_trunk = self.residual_block.residual_block( out_trunk, output_channels, scope_name='trunk_block2') # 2.2 分支:attention分支 # 先是一个max_pool+一个残差block out_mpool1 = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME', scope='max_pool1') out_softmax1 = self.residual_block.residual_block( out_mpool1, output_channels, scope_name='out_softmax1') # 3、out_softmax1又分为两个分支 # 3.1 skip_connection分支 out_skip_connection1 = self.residual_block.residual_block( out_softmax1, output_channels, scope_name='out_skip_connection1') # 3.2 相当于是有一个attentin分支:一个max_pool+一个残差block out_mpool2 = slim.max_pool2d(out_softmax1, [3, 3], stride=2, padding='SAME', scope='max_pool2') out_softmax2 = self.residual_block.residual_block( out_mpool2, output_channels, scope_name='out_softmax2') # 4、out_softmax2又是两个分支 # 3.1 skip_connection分支 out_skip_connection2 = self.residual_block.residual_block( out_softmax2, output_channels, scope_name='out_skip_connection2') # 3.2 相当于是一个attention分支:一个max_pool+一个残差block out_mpool3 = slim.max_pool2d(out_softmax2, [3, 3], stride=2, padding='SAME', scope='max_pool3') out_softmax3 = self.residual_block.residual_block( out_mpool3, output_channels, scope_name='out_softmax3') # out_softmax3又分成两个分支 # 5.1 skip_connection分支 out_skip_connection3 = self.residual_block.residual_block( out_softmax3, output_channels, scope_name='out_skip_connection3') # 5.2 相当于一个attention分支:一个max_pool+一个残差block out_pool4 = slim.max_pool2d(out_softmax3, output_channels, padding='SAME', scope='max_pool4') out_softmax4 = self.residual_block.residual_block( out_pool4, output_channels, scope_name='out_softmax4-1') # -------------------分支分完了,下面进行合并------------------- # 将output_softmax4 做一个residual_block,然后做上采样,和out_softmax4,skip_connection3 相加 # 6、 interpolation4 out_softmax4 = self.residual_block.residual_block( out_softmax4, output_channels, scope_name='out_softmax4-2') out_interp4 = tf.image.resize( out_softmax4, out_softmax3.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_softmax3 out = out_interp4 + out_skip_connection3 # 将out_interp4 做一个residual_block, 然后上采样,和和out_softmax3,skip_connection2 相加 # 7、 interpolation3 out_softmax5 = self.residual_block.residual_block( out, output_channels, scope_name='out_softmax5') out_interp3 = tf.image.resize( out_softmax5, out_softmax2.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_softmax2 out = out_interp3 + out_skip_connection2 # 将out_interp3 做一个residual_block,然后上采样,和out_softmax2,skip_connection1 相加 # 8、interpolation2 out_softmax6 = self.residual_block.residual_block( out, output_channels, scope_name='out_softmax6') out_interp2 = tf.image.resize( out_softmax6, out_softmax1.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_softmax1 out = out_interp2 + out_skip_connection1 # 将out_interp2 做一个residual_block,然后上采样,和out_softmax1,x_trunk 相加 # 9、 interpolation1 out_softmax7 = self.residual_block.residual_block( out, output_channels, scope_name='out_softmax7') out_interp1 = tf.image.resize( out_softmax7, out_trunk.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_trunk # batch+relu+conv+batch+relu+conv+sigmoid # 10、 out_softmax8 with tf.variable_scope('out_softmax8'): with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): out = slim.batch_norm(out_interp1, scope='batch_norm') out = slim.conv2d(out, output_channels, [1, 1], stride=1, scope='conv1') out = slim.conv2d(out, output_channels, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv2') out_softmax8 = tf.nn.sigmoid(out) # element_wise操作 # 11、 attention out = (1 + out_softmax8) * out_trunk # element_add操作 # 12、 last_out out_last = self.residual_block.residual_block( out, output_channels, scope_name='last_out') return out_last
def attention_block_stage2(self, input_x, output_channels, sub_scope_num): with tf.variable_scope( 'attention_block_stage2-{}'.format(sub_scope_num)): # 1、residual_attention_bolck 之前的卷积 x = self.residual_block.residual_block(input_x, output_channels, scope_name='head_block') # 2、输出x分为两个分支 # 2.1分支:2个residual_block堆叠,输出trunk out_trunk = self.residual_block.residual_block( x, output_channels, scope_name='trunk_block1') out_trunk = self.residual_block.residual_block( out_trunk, output_channels, scope_name='trunk_block2') # 2.2 分支:attention分支 # 先是一个max_pool+一个残差block out_mpool1 = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME', scope='max_pool1') out_softmax1 = self.residual_block.residual_block( out_mpool1, output_channels, scope_name='out_softmax1') # 3、out_softmax1又分为两个分支 # 3.1 skip_connection分支 out_skip_connection1 = self.residual_block.residual_block( out_softmax1, output_channels, scope_name='out_skip_connection1') # 3.2 相当于是有一个attentin分支:一个max_pool+一个残差block out_mpool2 = slim.max_pool2d(out_softmax1, [3, 3], stride=2, padding='SAME', scope='max_pool2') out_softmax2 = self.residual_block.residual_block( out_mpool2, output_channels, scope_name='out_softmax2-1') # -------------------分支分完了,下面进行合并------------------- # 将output_softmax2 做一个residual_block,然后做上采样,和out_softmax1,skip_connection1 相加 # 4、 interpolation2 out_softmax2 = self.residual_block.residual_block( out_softmax2, output_channels, scope_name='out_softmax2-2') out_interp2 = tf.image.resize( out_softmax2, out_softmax1.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_softmax1 out = out_interp2 + out_skip_connection1 # 将out_interp2 做一个residual_block,然后上采样,和x_trunk 相加 # 5、 interpolation1 out_softmax3 = self.residual_block.residual_block( out, output_channels, scope_name='out_softmax3') out_interp1 = tf.image.resize( out_softmax3, out_trunk.get_shape()[1:3], tf.image.ResizeMethod.BILINEAR) + out_trunk # batch+relu+conv+batch+relu+conv+sigmoid # 6、 out_softmax4 with tf.variable_scope('out_softmax4'): with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): out = slim.batch_norm(out_interp1, scope='batch_norm') out = slim.conv2d(out, output_channels, [1, 1], stride=1, scope='conv1') out = slim.conv2d(out, output_channels, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv2') out_softmax6 = tf.nn.sigmoid(out) # element_wise操作 # 7、 attention out = (1 + out_softmax6) * out_trunk # element_add操作 # 8、 last_out out_last = self.residual_block.residual_block( out, output_channels, scope_name='last_out') return out_last
def interface(self, input_x): with tf.variable_scope('residual_attention_network'): # resnet 头部结构,7*7,stride=2, 然后接一个2*2,stride=3的maxpool sc = arg_scope_.arg_scope(is_training=self.is_training) with slim.arg_scope(sc): conv1 = slim.conv2d(input_x, 64, [7, 7], stride=2, padding='SAME', scope='conv') mpool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='maxpool') residual_out1 = self.residual_block.residual_block( mpool1, 64, scope_name='residual_block1') # 缩小为1/8->80*60 residual_out2 = self.residual_block.residual_block( residual_out1, 128, stride=2, scope_name='residual_block2') # attention_stage1 attention_out1 = self.attention_block_stage0.attention_block_stage0( residual_out2, 128, 1) # decode attention_out0 # 上采样 变成1/2 with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): decode_attention_out1 = slim.conv2d(attention_out1, 128, [1, 1], stride=1, scope='deconv1-1') decode_attention_out1 = slim.conv2d_transpose( decode_attention_out1, 64, [3, 3], stride=2, scope='deconv1-2') decode_attention_out1 = slim.conv2d(decode_attention_out1, 64, [1, 1], stride=1, scope='deconv1-3') decode_attention_out1 = slim.conv2d_transpose( decode_attention_out1, 1, [3, 3], stride=2, normalizer_fn=None, activation_fn=None, scope='deconv1-4') # 进行一步下采样 # 缩小为1/16->40*30 residual_out3 = self.residual_block.residual_block( attention_out1, 256, stride=2, scope_name='residual_block3') # attention_stage1 # attention_out1_1 = self.attention_block_stage1.attention_block_stage1(residual_out1, 256, 1) attention_out2_2 = self.attention_block_stage1.attention_block_stage1( residual_out3, 256, 2) # decode attention_out2 # 上采样 变成1/4= with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): decode_attention_out2 = slim.conv2d(attention_out2_2, 256, [1, 1], stride=1, scope='deconv2-1') decode_attention_out2 = slim.conv2d_transpose( decode_attention_out2, 128, [3, 3], stride=2, scope='deconv2-2') decode_attention_out2 = slim.conv2d(decode_attention_out2, 128, [1, 1], stride=1, scope='deconv2-3') decode_attention_out2 = slim.conv2d_transpose( decode_attention_out2, 1, [3, 3], stride=2, normalizer_fn=None, activation_fn=None, scope='deconv2-4') # # 进行一步下采样 # residual_out2 = self.residual_block.residual_block( # attention_out1_2, 512, stride=2, scope_name='residual_block3' # ) # # attention_stage2 # # attention_out2_1 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 1) # # attention_out2_2 = self.attention_block_stage2.attention_block_stage2(attention_out2_1, 512, 2) # attention_out2_3 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 3) # # # decode attention_out2 # with slim.arg_scope(arg_scope_.arg_scope(is_training=self.is_training)): # decode_attention_out2 = slim.conv2d_transpose( # attention_out2_3, 64, [3, 3], stride=2, scope='deconv3-1' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-2' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-3' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 1, [3, 3], stride=2, # normalizer_fn=None, activation_fn=None, scope='deconv3-4' # ) # 30*23 # 20*15 residual_out4 = self.residual_block.residual_block( attention_out2_2, 512, stride=2, scope_name='residual_block4') residual_out5 = self.residual_block.residual_block( residual_out4, 512, scope_name='residual_block5') # 10*8 residual_out6 = self.residual_block.residual_block( residual_out5, 1024, stride=2, scope_name='residual_block6') global_avg_out = tf.reduce_mean(residual_out6, [1, 2], name='global_avg_pool', keepdims=True) logits = slim.conv2d(global_avg_out, self.num_class, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') return decode_attention_out1, decode_attention_out2, logits