def atrous_conv_block(self, inputs, depths, stage, rate, is_training, s=2): depth1, depth2, depth3 = depths layer1 = relu( bn( atrous_conv2d(inputs, depth1, [1, 1], rate, name=stage + '_a_conv'), is_training)) layer2 = relu( bn( atrous_conv2d(layer1, depth2, [3, 3], rate, name=stage + '_b_conv'), is_training)) layer3 = bn( atrous_conv2d(layer2, depth3, [1, 1], rate, name=stage + '_c_conv'), is_training) shortcut = bn( conv2d(inputs, depth3, [1, 1], name=stage + '_shortcut', strides=[1, s, s, 1], padding='VALID'), is_training) layer4 = relu(tf.add(layer3, shortcut)) return layer4
def atrous_identity_block(self, inputs, depths, stage, rate, is_training): depth1, depth2, depth3 = depths layer1 = relu( bn( atrous_conv2d(inputs, depth1, [1, 1], rate, name=stage + '_a_identity'), is_training)) layer2 = relu( bn( atrous_conv2d(layer1, depth2, [3, 3], rate, name=stage + '_b_identity'), is_training)) layer3 = bn( atrous_conv2d(layer2, depth3, [1, 1], rate, name=stage + '_c_identity'), is_training) layer4 = relu(tf.add(layer3, inputs)) return layer4
def make_model(self, inputs, is_training): """ extract feature using ResNet. Encoder """ with tf.variable_scope('ResNet50'): x = conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='conv1') # size 1/2 x = bn(x, is_training) x = relu(x) x = max_pool(x, ksize=[1, 3, 3, 1], name='pool1') # size 1/4 x = self.conv_block(x, [64, 64, 256], '2_1', is_training, s=1) x = self.identity_block(x, [64, 64, 256], '2_2', is_training) x = self.identity_block(x, [64, 64, 256], '2_3', is_training) x = self.conv_block(x, [128, 128, 512], '3_1', is_training) x = self.identity_block(x, [128, 128, 512], '3_2', is_training) x = self.identity_block(x, [128, 128, 512], '3_3', is_training) x = self.atrous_conv_block(x, [256, 256, 1024], '4_1', 2, is_training, s=1) x = self.atrous_identity_block(x, [256, 256, 1024], '4_2', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_3', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_4', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_5', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_6', 2, is_training) x = self.atrous_conv_block(x, [512, 512, 2048], '5_1', 4, is_training, s=1) x = self.atrous_identity_block(x, [512, 512, 2048], '5_2', 4, is_training) x = self.atrous_identity_block(x, [512, 512, 2048], '5_3', 4, is_training) """ Astrous Pyrimid Pooling. Decoder """ with tf.variable_scope('ASPP'): rate6 = atrous_conv2d(x, self.N_CLASS, [3, 3], 6, name='rate6') rate6 = conv2d(rate6, self.N_CLASS, [1, 1], name='rate6_conv1') rate6 = conv2d(rate6, self.N_CLASS, [1, 1], name='rate6_conv2') rate12 = atrous_conv2d(x, self.N_CLASS, [3, 3], 12, name='rate12') rate12 = conv2d(rate12, self.N_CLASS, [1, 1], name='rate12_conv1') rate12 = conv2d(rate12, self.N_CLASS, [1, 1], name='rate12_conv2') rate18 = atrous_conv2d(x, self.N_CLASS, [3, 3], 18, name='rate18') rate18 = conv2d(rate18, self.N_CLASS, [1, 1], name='rate18_conv1') rate18 = conv2d(rate18, self.N_CLASS, [1, 1], name='rate18_conv2') rate24 = atrous_conv2d(x, self.N_CLASS, [3, 3], 24, name='rate24') rate24 = conv2d(rate24, self.N_CLASS, [1, 1], name='rate24_conv1') rate24 = conv2d(rate24, self.N_CLASS, [1, 1], name='rate24_conv2') # self.logits = tf.add_n([rate6, rate12, rate18, rate24]) # self.out = tf.image.resize_bilinear(self.logits, size=[192, 192]) add_aspp = tf.add_n([rate6, rate12, rate18, rate24]) logits = tf.image.resize_bilinear(add_aspp, size=[self.RESIZE, self.RESIZE]) pred = tf.argmax(logits, axis=3) pred = tf.expand_dims(pred, dim=3) return logits, pred
def bottleneck(self, inputs, out_depth, f_h, f_w, is_training, keep_prob, dilated_rate=None, mode=None, scope=None): reduce_depth = int(inputs.get_shape().as_list()[3] / 4) with tf.variable_scope(scope): if mode == 'downsampling': main_branch = max_pool(inputs, name='_pool') depth_to_pad = abs(inputs.get_shape().as_list()[3] - out_depth) paddings = tf.convert_to_tensor([[0, 0], [0, 0], [0, 0], [0, depth_to_pad]]) main_branch = tf.pad(main_branch, paddings=paddings, name='_main_padding') sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [2, 2], name='_conv1', strides=[1, 2, 2, 1]), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, f_w], name='_conv2', strides=[1, 1, 1, 1]), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3', strides=[1, 1, 1, 1]), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'dilated': main_branch = inputs sub_branch = prelu(bn( conv2d( inputs, reduce_depth, [1, 1], name='_conv1', ), is_training), name='prelu_conv1') sub_branch = prelu(bn( atrous_conv2d(sub_branch, reduce_depth, [f_h, f_w], dilated_rate, name='_conv2'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(inputs, out_depth, [1, 1], name='_conv3'), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'asymmetric': main_branch = inputs sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1'), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, 1], name='_conv2'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [1, f_w], name='_conv3'), is_training), name='prelu_conv3') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv4'), is_training), name='prelu_conv4') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'upsampling': # 논문에서 나오는 unpool 대신 bilinear interpolation 사용 in_shape = inputs.get_shape().as_list() main_branch = tf.image.resize_bilinear( inputs, size=[in_shape[1] * 2, in_shape[2] * 2]) main_branch = prelu(bn( conv2d(main_branch, out_depth, [3, 3], name='_conv0'), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d_t(sub_branch, [ in_shape[0], in_shape[1] * 2, in_shape[2] * 2, reduce_depth ], [3, 3], name='_conv2'), is_training), name='prelu_conv3') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3'), is_training), name='prelu_conv4') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'normal': main_branch = inputs sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1', strides=[1, 1, 1, 1]), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, f_w], name='_conv2', strides=[1, 1, 1, 1]), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3', strides=[1, 1, 1, 1]), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out
def make_model(self, inputs, is_training): """ extract feature using ResNet. Encoder """ with tf.variable_scope('ResNet50'): x = conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='conv1') # size 1/2 x = bn(x, is_training) x = relu(x) x = max_pool(x, ksize=[1, 3, 3, 1], name='pool1') # size 1/4 x = self.conv_block(x, [64, 64, 256], '2_1', is_training, s=1) x = self.identity_block(x, [64, 64, 256], '2_2', is_training) x = self.identity_block(x, [64, 64, 256], '2_3', is_training) x = self.conv_block(x, [128, 128, 512], '3_1', is_training) x = self.identity_block(x, [128, 128, 512], '3_2', is_training) x = self.identity_block(x, [128, 128, 512], '3_3', is_training) x = self.atrous_conv_block(x, [256, 256, 1024], '4_1', 2, is_training, s=1) x = self.atrous_identity_block(x, [256, 256, 1024], '4_2', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_3', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_4', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_5', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_6', 2, is_training) x = self.atrous_conv_block(x, [512, 512, 2048], '5_1', 4, is_training, s=1) x = self.atrous_identity_block(x, [512, 512, 2048], '5_2', 4, is_training) x = self.atrous_identity_block(x, [512, 512, 2048], '5_3', 4, is_training) """ Astrous Pyrimid Pooling. Decoder """ with tf.variable_scope('ASPP'): feature_map_shape = x.get_shape().as_list() # global average pooling # feature 맵의 height, width를 평균을 낸다. feature_map = tf.reduce_mean(x, [1, 2], keepdims=True) feature_map = conv2d(feature_map, 256, [1, 1], name='gap_feature_map') feature_map = tf.image.resize_bilinear( feature_map, [feature_map_shape[1], feature_map_shape[2]]) rate1 = conv2d(x, 256, [1, 1], name='rate1') rate6 = atrous_conv2d(x, 256, [3, 3], rate=6, name='rate6') rate12 = atrous_conv2d(x, 256, [3, 3], rate=12, name='rate12') rate18 = atrous_conv2d(x, 256, [3, 3], rate=18, name='rate18') concated = tf.concat([feature_map, rate1, rate6, rate12, rate18], axis=3) net = conv2d(concated, 256, [1, 1], name='net') logits = conv2d(net, self.N_CLASS, [1, 1], name='logits') logits = tf.image.resize_bilinear(logits, size=[self.RESIZE, self.RESIZE], name='out') pred = tf.argmax(logits, axis=3) pred = tf.expand_dims(pred, dim=3) return logits, pred