def middle_flow(self, data): block_num = self.bottleneck_params["middle_flow"][0] strides = self.bottleneck_params["middle_flow"][1] chns = self.bottleneck_params["middle_flow"][2] strides = check_data(strides, block_num) chns = check_data(chns, block_num) # params to control your flow s = self.stride block_point = self.block_point output_stride = self.output_stride with scope("middle_flow"): for i in range(block_num): block_point = block_point + 1 with scope("block" + str(i + 1)): stride = strides[i] if check_stride( s * strides[i], output_stride) else 1 data, short_cuts = self.xception_block(data, chns[i], [1, 1, strides[i]], skip_conv=False) s = s * stride if check_points(block_point, self.decode_points): self.short_cuts[block_point] = short_cuts[1] self.stride = s self.block_point = block_point return data
def _decoder_with_concat(encode_data, decode_shortcut, param_attr): with scope('concat'): decode_shortcut = bn_relu( conv( decode_shortcut, 48, 1, 1, groups=1, padding=0, param_attr=param_attr)) encode_data = fluid.layers.resize_bilinear(encode_data, decode_shortcut.shape[2:]) encode_data = fluid.layers.concat([encode_data, decode_shortcut], axis=1) if cfg.MODEL.DEEPLAB.DECODER_USE_SEP_CONV: with scope("separable_conv1"): encode_data = separate_conv( encode_data, cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, 1, 3, dilation=1, act=relu) with scope("separable_conv2"): encode_data = separate_conv( encode_data, cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, 1, 3, dilation=1, act=relu) else: with scope("decoder_conv1"): encode_data = bn_relu( conv( encode_data, cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, stride=1, filter_size=3, dilation=1, padding=1, param_attr=param_attr)) with scope("decoder_conv2"): encode_data = bn_relu( conv( encode_data, cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, stride=1, filter_size=3, dilation=1, padding=1, param_attr=param_attr)) return encode_data
def xception_block(self, input, channels, strides=1, filters=3, dilation=1, skip_conv=True, has_skip=True, activation_fn_in_separable_conv=False): repeat_number = 3 channels = check_data(channels, repeat_number) filters = check_data(filters, repeat_number) strides = check_data(strides, repeat_number) data = input results = [] for i in range(repeat_number): with scope('separable_conv' + str(i + 1)): if not activation_fn_in_separable_conv: data = relu(data) data = separate_conv( data, channels[i], strides[i], filters[i], dilation=dilation) else: data = separate_conv( data, channels[i], strides[i], filters[i], dilation=dilation, act=relu) results.append(data) if not has_skip: return data, results if skip_conv: param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal( loc=0.0, scale=0.09)) with scope('shortcut'): skip = bn( conv( input, channels[-1], 1, strides[-1], groups=1, padding=0, param_attr=param_attr)) else: skip = input return data + skip, results
def entry_flow(self, data): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) with scope("entry_flow"): with scope("conv1"): data = bn_relu( conv(data, 32, 3, stride=2, padding=1, param_attr=param_attr)) with scope("conv2"): data = bn_relu( conv(data, 64, 3, stride=1, padding=1, param_attr=param_attr)) # get entry flow params block_num = self.bottleneck_params["entry_flow"][0] strides = self.bottleneck_params["entry_flow"][1] chns = self.bottleneck_params["entry_flow"][2] strides = check_data(strides, block_num) chns = check_data(chns, block_num) #print("entry:", block_num, strides, chns) # params to control your flow s = self.stride block_point = self.block_point output_stride = self.output_stride #print("entry:", s, block_point, output_stride) with scope("entry_flow"): for i in range(block_num): block_point = block_point + 1 with scope("block" + str(i + 1)): stride = strides[i] if check_stride( s * strides[i], output_stride) else 1 data, short_cuts = self.xception_block( data, chns[i], [1, 1, stride]) s = s * stride if check_points(block_point, self.decode_points): #print("decode shortcut:", block_point) self.short_cuts[block_point] = short_cuts[1] #print("entry:", i, data.shape) self.stride = s self.block_point = block_point #print("entry:", s, block_point, output_stride) return data
def decode(data, short_cuts): # 解码器设置,与编码器对称 with scope("decode"): with scope("decode1"): data = up(data, short_cuts[3], 256) with scope("decode2"): data = up(data, short_cuts[2], 128) with scope("decode3"): data = up(data, short_cuts[1], 64) with scope("decode4"): data = up(data, short_cuts[0], 64) return data
def double_conv(data, out_ch): param_attr = fluid.ParamAttr( name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) with scope("conv0"): data = bn_relu( conv(data, out_ch, 3, stride=1, padding=1, param_attr=param_attr)) with scope("conv1"): data = bn_relu( conv(data, out_ch, 3, stride=1, padding=1, param_attr=param_attr)) return data
def sub_net_1(input): with scope("conv1_sub1"): tmp = conv(input, 32, 3, 2, padding=1) tmp = bn(tmp, act='relu') with scope("conv2_sub1"): tmp = conv(tmp, 32, 3, 2, padding=1) tmp = bn(tmp, act='relu') with scope("conv3_sub1"): tmp = conv(tmp, 64, 3, 2, padding=1) tmp = bn(tmp, act='relu') with scope("conv3_sub1_proj"): tmp = conv(tmp, 128, 1, 1) tmp = bn(tmp) return tmp
def sub_net_4(input, input_shape): tmp = pyramis_pooling(input, input_shape) with scope("conv5_4_k1"): tmp = conv(tmp, 256, 1, 1) tmp = bn(tmp, act='relu') tmp = interp(tmp, out_shape=np.ceil(input_shape / 16)) return tmp
def psp_module(input, out_features): cat_layers = [] sizes = (1, 2, 3, 6) for size in sizes: psp_name = "psp" + str(size) with scope(psp_name): pool = fluid.layers.adaptive_pool2d(input, pool_size=[size, size], pool_type='avg', name=psp_name + '_adapool') data = conv(pool, out_features, filter_size=1, bias_attr=False, name=psp_name + '_conv') data_bn = bn(data, act='relu') interp = fluid.layers.resize_bilinear(data_bn, out_shape=input.shape[2:], name=psp_name + '_interp', align_mode=0) cat_layers.append(interp) cat_layers = [input] + cat_layers out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') return out
def _decoder_with_sum_merge(encode_data, decode_shortcut, param_attr): encode_data = fluid.layers.resize_bilinear(encode_data, decode_shortcut.shape[2:]) encode_data = conv( encode_data, #cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, 256, 1, 1, groups=1, padding=0, param_attr=param_attr) with scope('merge'): decode_shortcut = conv( decode_shortcut, #cfg.MODEL.DEEPLAB.DECODER.CONV_FILTERS, 256, 1, 1, groups=1, padding=0, param_attr=param_attr) return encode_data + decode_shortcut
def deeplabv3p_nas(img, num_classes, arch=None): data, decode_shortcut = nas_backbone(img, arch) # 编码器解码器设置 cfg.MODEL.DEFAULT_EPSILON = 1e-5 if cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP: data = encoder(data) if cfg.MODEL.DEEPLAB.ENABLE_DECODER: data = decoder(data, decode_shortcut) # 根据类别数设置最后一个卷积层输出,并resize到图片原始尺寸 param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope('logit'): logit = conv(data, num_classes, 1, stride=1, padding=0, bias_attr=True, param_attr=param_attr) logit = fluid.layers.resize_bilinear(logit, img.shape[2:]) return logit
def deeplabv3p(img, num_classes): # Backbone设置:xception 或 mobilenetv2 if 'xception' in cfg.MODEL.DEEPLAB.BACKBONE: data, decode_shortcut = xception(img) elif 'mobilenet' in cfg.MODEL.DEEPLAB.BACKBONE: data, decode_shortcut = mobilenetv2(img) else: raise Exception("deeplab only support xception and mobilenet backbone") # 编码器解码器设置 cfg.MODEL.DEFAULT_EPSILON = 1e-5 if cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP: data = encoder(data) if cfg.MODEL.DEEPLAB.ENABLE_DECODER: data = decoder(data, decode_shortcut) # 根据类别数设置最后一个卷积层输出,并resize到图片原始尺寸 param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope('logit'): with fluid.name_scope('last_conv'): logit = conv(data, num_classes, 1, stride=1, padding=0, bias_attr=True, param_attr=param_attr) logit = fluid.layers.resize_bilinear(logit, img.shape[2:]) return logit
def deeplabv3p(img, num_classes): # Backbone设置:xception 或 mobilenetv2 data, decode_shortcut = resnet_vd(img) # 编码器解码器设置 #cfg.MODEL.DEFAULT_EPSILON = 1e-5 #if cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP: data = encoder(data) #if cfg.MODEL.DEEPLAB.ENABLE_DECODER: data = decoder(data, decode_shortcut) # 根据类别数设置最后一个卷积层输出,并resize到图片原始尺寸 param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) #if not cfg.MODEL.DEEPLAB.DECODER.OUTPUT_IS_LOGITS: with scope('logit'): with fluid.name_scope('last_conv'): logit = conv( data, num_classes, 1, stride=1, padding=0, bias_attr=True, param_attr=param_attr) #else: # logit = data logit = fluid.layers.resize_bilinear(logit, img.shape[2:]) return logit
def exit_flow(self, data): block_num = self.bottleneck_params["exit_flow"][0] strides = self.bottleneck_params["exit_flow"][1] chns = self.bottleneck_params["exit_flow"][2] strides = check_data(strides, block_num) chns = check_data(chns, block_num) #print("exit:", block_num, strides, chns) assert (block_num == 2) # params to control your flow s = self.stride block_point = self.block_point output_stride = self.output_stride #print("exit:", s, block_point, output_stride) with scope("exit_flow"): with scope('block1'): block_point += 1 stride = strides[0] if check_stride(s * strides[0], output_stride) else 1 data, short_cuts = self.xception_block(data, chns[0], [1, 1, stride]) s = s * stride if check_points(block_point, self.decode_points): #print("decode shortcut:", block_point) self.short_cuts[block_point] = short_cuts[1] #print("exit:", 0, data.shape) with scope('block2'): block_point += 1 stride = strides[1] if check_stride(s * strides[1], output_stride) else 1 data, short_cuts = self.xception_block( data, chns[1], [1, 1, stride], dilation=2, has_skip=False, activation_fn_in_separable_conv=True) s = s * stride if check_points(block_point, self.decode_points): #print("decode shortcut:", block_point) self.short_cuts[block_point] = short_cuts[1] #print("exit:", 1, data.shape) self.stride = s self.block_point = block_point #print("exit:", s, block_point, output_stride) return data
def net(self, x): with scope('dsconv1'): x = separate_conv(x, self.dw_channels, stride=self.stride, filter=3, act=fluid.layers.relu) with scope('dsconv2'): x = separate_conv(x, self.dw_channels, stride=self.stride, filter=3, act=fluid.layers.relu) x = dropout2d(x, 0.1, is_train=cfg.PHASE == 'train') x = conv(x, self.num_classes, 1, bias_attr=True) return x
def CCF24(sub2_out, sub4_out, input_shape): with scope("conv_sub4"): tmp = conv(sub4_out, 128, 3, dilation=2, padding=2) tmp = bn(tmp) tmp = tmp + sub2_out tmp = fluid.layers.relu(tmp) tmp = interp(tmp, np.ceil(input_shape / 8)) return tmp
def learning_to_downsample(x, dw_channels1=32, dw_channels2=48, out_channels=64): x = relu(bn(conv(x, dw_channels1, 3, 2))) with scope('dsconv1'): x = separate_conv(x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu) with scope('dsconv2'): x = separate_conv(x, out_channels, stride=2, filter=3, act=fluid.layers.relu) return x
def CCF124(sub1_out, sub24_out, input_shape): tmp = zero_padding(sub24_out, padding=2) with scope("conv_sub2"): tmp = conv(tmp, 128, 3, dilation=2) tmp = bn(tmp) tmp = tmp + sub1_out tmp = fluid.layers.relu(tmp) tmp = interp(tmp, input_shape // 4) return tmp
def net(self, higher_res_feature, lower_res_feature): h, w = higher_res_feature.shape[2:] lower_res_feature = fluid.layers.resize_bilinear(lower_res_feature, [h, w], align_mode=0) with scope('dwconv'): lower_res_feature = relu( bn(conv(lower_res_feature, self.out_channels, 1))) #(lower_res_feature) with scope('conv_lower_res'): lower_res_feature = bn( conv(lower_res_feature, self.out_channels, 1, bias_attr=True)) with scope('conv_higher_res'): higher_res_feature = bn( conv(higher_res_feature, self.out_channels, 1, bias_attr=True)) out = higher_res_feature + lower_res_feature return relu(out)
def get_logit(data, num_classes): # 根据类别数设置最后一个卷积层输出 param_attr = fluid.ParamAttr( name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope("logit"): data = conv( data, num_classes, 3, stride=1, padding=1, param_attr=param_attr) return data
def psp_module(input, out_features): # Pyramid Scene Parsing 金字塔池化模块 # 输入:backbone输出的特征 # 输出:对输入进行不同尺度pooling, 卷积操作后插值回原始尺寸,并concat # 最后进行一个卷积及BN操作 cat_layers = [] sizes = (1, 2, 3, 6) for size in sizes: psp_name = "psp" + str(size) with scope(psp_name): pool = fluid.layers.adaptive_pool2d( input, pool_size=[size, size], pool_type='avg', name=psp_name + '_adapool') data = conv( pool, out_features, filter_size=1, bias_attr=True, name=psp_name + '_conv') data_bn = bn(data, act='relu') interp = fluid.layers.resize_bilinear( data_bn, out_shape=input.shape[2:], name=psp_name + '_interp') cat_layers.append(interp) cat_layers = [input] + cat_layers[::-1] cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') psp_end_name = "psp_end" with scope(psp_end_name): data = conv( cat, out_features, filter_size=3, padding=1, bias_attr=True, name=psp_end_name) out = bn(data, act='relu') return out
def net(self, input, output_stride=32, num_classes=1000, end_points=None, decode_points=None): self.stride = 2 self.block_point = 0 self.output_stride = output_stride self.decode_points = decode_points self.short_cuts = dict() with scope(self.backbone): # Entry flow data = self.entry_flow(input) if check_points(self.block_point, end_points): return data, self.short_cuts # Middle flow data = self.middle_flow(data) if check_points(self.block_point, end_points): return data, self.short_cuts # Exit flow data = self.exit_flow(data) if check_points(self.block_point, end_points): return data, self.short_cuts data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) data = fluid.layers.dropout(data, 0.5) stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) with scope("logit"): out = fluid.layers.fc( input=data, size=num_classes, act='softmax', param_attr=fluid.param_attr.ParamAttr( name='weights', initializer=fluid.initializer.Uniform(-stdv, stdv)), bias_attr=fluid.param_attr.ParamAttr(name='bias')) return out
def decoder(encode_data, decode_shortcut): # 解码器配置 # encode_data:编码器输出 # decode_shortcut: 从backbone引出的分支, resize后与encode_data concat # DECODER_USE_SEP_CONV: 默认为真,则concat后连接两个可分离卷积,否则为普通卷积 param_attr = fluid.ParamAttr(name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal( loc=0.0, scale=0.06)) with scope('decoder'): with scope('concat'): decode_shortcut = bn_relu( conv(decode_shortcut, 48, 1, 1, groups=1, padding=0, param_attr=param_attr)) encode_data = fluid.layers.resize_bilinear( encode_data, decode_shortcut.shape[2:]) encode_data = fluid.layers.concat([encode_data, decode_shortcut], axis=1) if cfg.MODEL.DEEPLAB.DECODER_USE_SEP_CONV: with scope("separable_conv1"): encode_data = separate_conv(encode_data, 256, 1, 3, dilation=1, act=relu) with scope("separable_conv2"): encode_data = separate_conv(encode_data, 256, 1, 3, dilation=1, act=relu) else: with scope("decoder_conv1"): encode_data = bn_relu( conv(encode_data, 256, stride=1, filter_size=3, dilation=1, padding=1, param_attr=param_attr)) with scope("decoder_conv2"): encode_data = bn_relu( conv(encode_data, 256, stride=1, filter_size=3, dilation=1, padding=1, param_attr=param_attr)) return encode_data
def net(self, x): x, _ = inverted_blocks(x, self.in_channels, self.t, self.block_channels[0], self.num_blocks[0], 2, 'inverted_block_1') x, _ = inverted_blocks(x, self.block_channels[0], self.t, self.block_channels[1], self.num_blocks[1], 2, 'inverted_block_2') x, _ = inverted_blocks(x, self.block_channels[1], self.t, self.block_channels[2], self.num_blocks[2], 1, 'inverted_block_3') x = psp_module(x, self.block_channels[2] // 4) with scope('out'): x = relu(bn(conv(x, self.out_channels, 1))) return x
def decoder(encode_data, decode_shortcut): # 解码器配置 # encode_data:编码器输出 # decode_shortcut: 从backbone引出的分支, resize后与encode_data concat # DECODER_USE_SEP_CONV: 默认为真,则concat后连接两个可分离卷积,否则为普通卷积 param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) with scope('decoder'): #if cfg.MODEL.DEEPLAB.DECODER.USE_SUM_MERGE: # return _decoder_with_sum_merge(encode_data, decode_shortcut, # param_attr) return _decoder_with_concat(encode_data, decode_shortcut, param_attr)
def get_logit(data, num_classes, name="logit"): param_attr = fluid.ParamAttr( name=name + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope(name): data = conv(data, num_classes, 1, stride=1, padding=0, param_attr=param_attr, bias_attr=True) return data
def get_logit_interp(input, num_classes, out_shape, name="logit"): # 根据类别数决定最后一层卷积输出, 并插值回原始尺寸 param_attr = fluid.ParamAttr( name=name + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope(name): logit = conv( input, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=name + '_conv') logit_interp = fluid.layers.resize_bilinear( logit, out_shape=out_shape, name=name + '_interp') return logit_interp
def up(data, short_cut, out_ch): # 上采样:data上采样(resize或deconv), 并与short_cut concat param_attr = fluid.ParamAttr( name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.XavierInitializer(), ) with scope("up"): if cfg.MODEL.UNET.UPSAMPLE_MODE == 'bilinear': data = fluid.layers.resize_bilinear(data, short_cut.shape[2:]) else: data = deconv(data, out_ch // 2, filter_size=2, stride=2, padding=0, param_attr=param_attr) data = fluid.layers.concat([data, short_cut], axis=1) data = double_conv(data, out_ch) return data
def fast_scnn(img, num_classes): size = img.shape[2:] classifier = Classifier(128, num_classes) global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) feature_fusion = FeatureFusionModule(64, 128, 128) with scope('learning_to_downsample'): higher_res_features = learning_to_downsample(img, 32, 48, 64) with scope('global_feature_extractor'): lower_res_feature = global_feature_extractor.net(higher_res_features) with scope('feature_fusion'): x = feature_fusion.net(higher_res_features, lower_res_feature) with scope('classifier'): logit = classifier.net(x) logit = fluid.layers.resize_bilinear(logit, size, align_mode=0) if len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 3: with scope('aux_layer_higher'): higher_logit = aux_layer(higher_res_features, num_classes) higher_logit = fluid.layers.resize_bilinear(higher_logit, size, align_mode=0) with scope('aux_layer_lower'): lower_logit = aux_layer(lower_res_feature, num_classes) lower_logit = fluid.layers.resize_bilinear(lower_logit, size, align_mode=0) return logit, higher_logit, lower_logit elif len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 2: with scope('aux_layer_higher'): higher_logit = aux_layer(higher_res_features, num_classes) higher_logit = fluid.layers.resize_bilinear(higher_logit, size, align_mode=0) return logit, higher_logit return logit
def encode(data): # 编码器设置 short_cuts = [] with scope("encode"): with scope("block1"): data = double_conv(data, 64) short_cuts.append(data) with scope("block2"): data = down(data, 128) short_cuts.append(data) with scope("block3"): data = down(data, 256) short_cuts.append(data) with scope("block4"): data = down(data, 512) short_cuts.append(data) with scope("block5"): data = down(data, 512) return data, short_cuts