def gru_module(x, num_state, num_node): ''' Global Reasoning Unit: projection --> graph reasoning --> reverse projection params: x: B x C x H x W num_state: the dimension of each vertex feature num_node: the number of vertet output: B x C x H x W feature trans: B, C, H, W --> B, N, H, W --> B, N, H*W -->B, N, C1 -->B, C1, N-->B, C1, N-->B, C1, H*W-->B, C, H, W --> B, C1,H, W -->B, C1,H*W -->B, H*W, C1 ''' # generate B num_batch, C, H, W = x.shape with scope('projection'): B = conv(x, num_node, filter_size=1, bias_attr=True, name='projection' + '_conv') #num_batch, node, H, W B = fluid.layers.reshape( B, shape=[num_batch, num_node, H * W]) # Projection Matrix: num_batch, node, L=H*W # reduce dimension with scope('reduce_channel'): x_reduce = conv(x, num_state, filter_size=1, bias_attr=True, name='reduce_channel' + '_conv') #num_batch, num_state, H, W x_reduce = fluid.layers.reshape(x_reduce, shape=[num_batch, num_state, H * W ]) #num_batch, num_state, L x_reduce = fluid.layers.transpose(x_reduce, perm=[0, 2, 1]) #num_batch, L, num_state V = fluid.layers.transpose(fluid.layers.matmul(B, x_reduce), perm=[0, 2, 1]) #num_batch, num_state, num_node #L = fluid.layers.fill_constant(shape=[1], value=H*W, dtype='float32') #V = fluid.layers.elementwise_div(V, L) new_V = gcn_module('gru' + '_gcn', V, num_node, num_state) B = fluid.layers.reshape(B, shape=[num_batch, num_node, H * W]) D = fluid.layers.transpose(B, perm=[0, 2, 1]) Y = fluid.layers.matmul(D, fluid.layers.transpose(new_V, perm=[0, 2, 1])) Y = fluid.layers.transpose(Y, perm=[0, 2, 1]) Y = fluid.layers.reshape(Y, shape=[num_batch, num_state, H, W]) with scope('extend_dim'): Y = conv(Y, C, filter_size=1, bias_attr=False, name='extend_dim' + '_conv') #Y = bn_zero(Y) Y = bn(Y) out = fluid.layers.elementwise_add(Y, x) return out
def entry_flow(self, data): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) with scope("entry_flow"): with scope("conv1"): data = bn_relu( conv(data, 32, 3, stride=2, padding=1, param_attr=param_attr)) with scope("conv2"): data = bn_relu( conv(data, 64, 3, stride=1, padding=1, param_attr=param_attr)) # get entry flow params block_num = self.bottleneck_params["entry_flow"][0] strides = self.bottleneck_params["entry_flow"][1] chns = self.bottleneck_params["entry_flow"][2] strides = check_data(strides, block_num) chns = check_data(chns, block_num) # params to control your flow s = self.stride block_point = self.block_point output_stride = self.output_stride with scope("entry_flow"): for i in range(block_num): block_point = block_point + 1 with scope("block" + str(i + 1)): stride = strides[i] if check_stride( s * strides[i], output_stride) else 1 data, short_cuts = self.xception_block( data, chns[i], [1, 1, stride]) s = s * stride if check_points(block_point, self.decode_points): self.short_cuts[block_point] = short_cuts[1] self.stride = s self.block_point = block_point return data
def glore(input, num_classes): """ Reference: Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks", In CVPR 2019 """ # Backbone: ResNet res5, feat_dict = resnet(input) res4 = feat_dict[91] # 3x3 Conv. 2048 -> 512 reduce_kernel = 3 if cfg.DATASET.DATASET_NAME == 'cityscapes': reduce_kernel = 1 with scope('feature'): feature = conv(res5, 512, filter_size=reduce_kernel, bias_attr=False, name='feature_conv') feature = bn(feature, act='relu') # GRU Module gru_output = gru_module(feature, num_state=128, num_node=64) dropout = fluid.layers.dropout(gru_output, dropout_prob=0.1, name="dropout") logit = get_logit_interp(dropout, num_classes, input.shape[2:]) if cfg.MODEL.GLORE.AuxHead: aux_logit = FCNHead(res4, 256, num_classes, input.shape[2:]) return logit, aux_logit return logit
def xception_block(self, input, channels, strides=1, filters=3, dilation=1, skip_conv=True, has_skip=True, activation_fn_in_separable_conv=False): repeat_number = 3 channels = check_data(channels, repeat_number) filters = check_data(filters, repeat_number) strides = check_data(strides, repeat_number) data = input results = [] for i in range(repeat_number): with scope('separable_conv' + str(i + 1)): if not activation_fn_in_separable_conv: data = relu(data) data = separate_conv(data, channels[i], strides[i], filters[i], dilation=dilation) else: data = separate_conv(data, channels[i], strides[i], filters[i], dilation=dilation, act=relu) results.append(data) if not has_skip: return data, results if skip_conv: param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) with scope('shortcut'): skip = bn( conv(input, channels[-1], 1, strides[-1], groups=1, padding=0, param_attr=param_attr)) else: skip = input return data + skip, results
def get_logit_interp(input, num_classes, out_shape, name="logit"): # 1x1_Conv param_attr = fluid.ParamAttr( name=name + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope(name): logit = conv(input, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=name + '_conv') logit_interp = fluid.layers.resize_bilinear(logit, out_shape=out_shape, name=name + '_interp') return logit_interp
def PSPHead(input, out_features, num_classes, output_shape): # Arch of Pyramid Scene Parsing Module: # # |----> Pool_1x1 + Conv_1x1 + BN + ReLU + bilinear_interp-------->|————————| # | | | # |----> Pool_2x2 + Conv_1x1 + BN + ReLU + bilinear_interp-------->| | # x ------>| | concat |----> Conv_3x3 + BN + ReLU -->Dropout --> Conv_1x1 # | |----> Pool_3x3 + Conv_1x1 + BN + ReLU + bilinear_interp-------->| | # | | | | # | |----> Pool_6x6 + Conv_1x1 + BN + ReLU + bilinear_interp-------->|________| # | ^ # |——————————————————————————————————————————————————————————————————————————————| # cat_layers = [] sizes = (1, 2, 3, 6) # 4 parallel pooling branches for size in sizes: psp_name = "psp" + str(size) with scope(psp_name): pool_feat = fluid.layers.adaptive_pool2d(input, pool_size=[size, size], pool_type='avg', name=psp_name + '_adapool') conv_feat = conv(pool_feat, out_features, filter_size=1, bias_attr=True, name=psp_name + '_conv') bn_feat = bn(conv_feat, act='relu') interp = fluid.layers.resize_bilinear(bn_feat, out_shape=input.shape[2:], name=psp_name + '_interp') cat_layers.append(interp) cat_layers = [input] + cat_layers[::-1] cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') # Conv_3x3 + BN + ReLU psp_end_name = "psp_end" with scope(psp_end_name): data = conv(cat, out_features, filter_size=3, padding=1, bias_attr=True, name=psp_end_name) out = bn(data, act='relu') # Dropout dropout_out = fluid.layers.dropout(out, dropout_prob=0.1, name="dropout") # Conv_1x1 + bilinear_upsample seg_name = "logit" with scope(seg_name): param_attr = fluid.ParamAttr( name=seg_name + '_weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) logit = conv(dropout_out, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=seg_name + '_conv') logit_interp = fluid.layers.resize_bilinear(logit, out_shape=output_shape, name=seg_name + '_interp') return logit_interp
def ASPPHead(input, mid_channel, num_classes, output_shape): # Arch of Atorus Spatial Pyramid Pooling Module: # # |----> ImagePool + Conv_1x1 + BN + ReLU + bilinear_interp-------->|————————| # | | | # |----> Conv_1x1 + BN + ReLU -------->| | # | | | # x----->|----> AtrousConv_3x3 + BN + ReLU -------->| concat |----> Conv_1x1 + BN + ReLU -->Dropout --> Conv_1x1 # | | | # |----> AtrousConv_3x3 + BN + ReLU -------->| | # | | | # |----> AtorusConv_3x3 + BN + ReLU -------->|________| # # if cfg.MODEL.BACKBONE_OUTPUT_STRIDE == 16: aspp_ratios = [6, 12, 18] elif cfg.MODEL.BACKBONE_OUTPUT_STRIDE == 8: aspp_ratios = [12, 24, 36] else: raise Exception("deeplab only support stride 8 or 16") param_attr = fluid.ParamAttr(name=name_scope + 'weights', regularizer=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) with scope('ASPPHead'): with scope("image_pool"): image_avg = fluid.layers.reduce_mean( input, [2, 3], keep_dim=True) image_avg = bn_relu( conv( image_avg, mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) image_avg = fluid.layers.resize_bilinear(image_avg, input.shape[2:]) with scope("aspp0"): aspp0 = bn_relu( conv( input, mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) with scope("aspp1"): if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: aspp1 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[0], act=relu) else: aspp1 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[0], padding=aspp_ratios[0], param_attr=param_attr)) with scope("aspp2"): if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: aspp2 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[1], act=relu) else: aspp2 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[1], padding=aspp_ratios[1], param_attr=param_attr)) with scope("aspp3"): if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: aspp3 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[2], act=relu) else: aspp3 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[2], padding=aspp_ratios[2], param_attr=param_attr)) with scope("concat"): feat = fluid.layers.concat([image_avg, aspp0, aspp1, aspp2, aspp3], axis=1) feat = bn_relu( conv( feat, 2*mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) feat = fluid.layers.dropout(feat, 0.1) # Conv_1x1 + bilinear_upsample seg_name = "logit" with scope(seg_name): param_attr = fluid.ParamAttr( name= seg_name+'_weights', regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0), initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) logit = conv(feat, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=seg_name+'_conv') logit_interp = fluid.layers.resize_bilinear(logit, out_shape=output_shape, name=seg_name+'_interp') return logit_interp