def __init__(self, inplanes, outplanes, innerplanes, stride=1, dilation=1, group=1, downsample=None): super().__init__() # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) self.stride = stride self.conv1 = nn.Conv2d( inplanes, innerplanes, kernel_size=1, stride=str1x1, bias=False) self.gn1 = nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv2 = nn.Conv2d( innerplanes, innerplanes, kernel_size=3, stride=str3x3, bias=False, padding=1 * dilation, dilation=dilation, groups=group) self.gn2 = nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv3 = nn.Conv2d( innerplanes, outplanes, kernel_size=1, stride=1, bias=False) self.gn3 = nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON) self.downsample = downsample self.relu = nn.ReLU(inplace=True)
def __init__(self, inplanes, outplanes, innerplanes, stride=1, dilation=1, group=1, downsample=None, attention=False): super().__init__() # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) self.stride = stride self.conv1 = nn.Conv2d( inplanes, innerplanes, kernel_size=1, stride=str1x1, bias=False) self.gn1 = nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv2 = nn.Conv2d( innerplanes, innerplanes, kernel_size=3, stride=str3x3, bias=False, padding=1 * dilation, dilation=dilation, groups=group) self.gn2 = nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv3 = nn.Conv2d( innerplanes, outplanes, kernel_size=1, stride=1, bias=False) self.gn3 = nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON) self.downsample = downsample self.relu = nn.ReLU(inplace=True)
def __init__(self, num_backbone_stages): super().__init__() fpn_dim = cfg.FPN.DIM self.num_backbone_stages = num_backbone_stages self.prd_conv_lateral = nn.ModuleList() for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.prd_conv_lateral.append( nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON))) else: self.prd_conv_lateral.append( nn.Conv2d(fpn_dim, fpn_dim, 1, 1, 0)) self.posthoc_modules = nn.ModuleList() for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append( nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON))) else: self.posthoc_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1)) self._init_weights()
def __init__(self, dim_in, roi_xform_func, spatial_scale, num_convs): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.num_convs = num_convs dilation = cfg.BSHAPE.DILATION dim_inner = cfg.BSHAPE.DIM_REDUCED self.dim_out = dim_inner module_list = [] for i in range(num_convs - 1): module_list.extend([ nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = dim_inner self.conv_fcn = nn.Sequential(*module_list) self.bshape_conv1 = nn.ModuleList() num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 for i in range(num_levels): self.bshape_conv1.append(nn.Sequential( nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) # upsample layer self.upconv = nn.ConvTranspose2d(dim_inner, dim_inner, 2, 2, 0) self.apply(self._init_weights)
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM module_list = [] for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS - 1): module_list.extend([ nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = hidden_dim self.convs = nn.Sequential(*module_list) self.dim_out = fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc = nn.Linear(dim_in * roi_size * roi_size, fc_dim) num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.conv1_head = nn.ModuleList() for i in range(num_levels): self.conv1_head.append(nn.Sequential( nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) self._init_weights()
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM module_list = [] for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS - 1): module_list.extend([ nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = hidden_dim self.convs = nn.Sequential(*module_list) self.dim_out = fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc = nn.Linear(dim_in * roi_size * roi_size, fc_dim) num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.conv1_head = nn.ModuleList() for i in range(num_levels): self.conv1_head.append( nn.Sequential( nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True))) self._init_weights()
def __init__(self, dim_in, roi_xform_func, spatial_scale, num_convs): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.num_convs = num_convs dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED self.dim_out = dim_inner module_list = [] for i in range(num_convs - 1): module_list.extend([ nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = dim_inner self.conv_fcn = nn.Sequential(*module_list) self.mask_conv1 = nn.ModuleList() num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 for i in range(num_levels): self.mask_conv1.append(nn.Sequential( nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) # upsample layer self.upconv = nn.ConvTranspose2d(dim_inner, dim_inner, 2, 2, 0) self.apply(self._init_weights)
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.fc1 = nn.ModuleList() for i in range(num_levels): self.fc1.append( nn.Sequential( nn.Linear(dim_in * roi_size**2, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True))) #self.fc1 = nn.Sequential(nn.Linear(dim_in * roi_size**2, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, # eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True)) self._init_weights()
def __init__(self, dim_in_lateral): super().__init__() self.dim_in_lateral = dim_in_lateral if cfg.FPN.USE_GN: self.conv_lateral = nn.Sequential( nn.Conv2d(self.dim_in_lateral, self.dim_in_lateral, 3, stride=2, padding=1), nn.GroupNorm(net_utils.get_group_gn(self.dim_in_lateral), self.dim_in_lateral, eps=cfg.GROUP_NORM.EPSILON)) else: self.conv_lateral = nn.Conv2d(self.dim_in_lateral, self.dim_in_lateral, 3, stride=2, padding=1) if cfg.FPN.USE_GN: self.posthoc = nn.Sequential( nn.Conv2d(dim_in_lateral, dim_in_lateral, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_in_lateral), dim_in_lateral, eps=cfg.GROUP_NORM.EPSILON)) else: self.posthoc = nn.Conv2d(dim_in_lateral, dim_in_lateral, 3, 1, 1) self._init_weights()
def bottleneck_gn_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation with GroupNorm to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> GN -> ReLU cur = model.ConvGN( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, group_gn=get_group_gn(dim_inner), stride=str1x1, pad=0, ) cur = model.Relu(cur, cur) # conv 3x3 -> GN -> ReLU cur = model.ConvGN( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, group_gn=get_group_gn(dim_inner), stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, ) cur = model.Relu(cur, cur) # conv 1x1 -> GN (no ReLU) cur = model.ConvGN( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=1, pad=0, ) return cur
def __init__(self, dim_in, roi_xform_func, spatial_scale, num_convs): super(mask_rcnn_fcn_head_v1upXconvs_gn_adp_ff, self).__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.num_convs = num_convs dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED self.dim_out = dim_inner module_list = [] for i in range(2): module_list.extend([ nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = dim_inner self.conv_fcn = nn.Sequential(*module_list) self.mask_conv1 = nn.ModuleList() num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 for i in range(num_levels): self.mask_conv1.append(nn.Sequential( nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) self.mask_conv4 = nn.Sequential( nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True)) self.mask_conv4_fc = nn.Sequential( nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True)) self.mask_conv5_fc = nn.Sequential( nn.Conv2d(dim_in, int(dim_inner / 2), 3, 1, padding=1*dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), int(dim_inner / 2), eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True)) self.mask_fc = nn.Sequential( nn.Linear(int(dim_inner / 2) * (cfg.MRCNN.ROI_XFORM_RESOLUTION) ** 2, cfg.MRCNN.RESOLUTION ** 2, bias=True), nn.ReLU(inplace=True)) # upsample layer self.upconv = nn.ConvTranspose2d(dim_inner, dim_inner, 2, 2, 0) self.apply(self._init_weights)
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def basic_gn_stem(): return nn.Sequential(OrderedDict([ ('conv1', nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False)), ('gn1', nn.GroupNorm(net_utils.get_group_gn(64), 64, eps=cfg.GROUP_NORM.EPSILON)), ('relu', nn.ReLU(inplace=True)), ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))]))
def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN(current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6', 'fc6') return 'fc6', fc_dim
def __init__(self, dim_in, roi_xform_func, spatial_scale, num_convs): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.num_convs = num_convs dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED self.dim_out = dim_inner module_list = [] for i in range(num_convs): module_list.extend([ nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1 * dilation, dilation=dilation, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) dim_in = dim_inner self.conv_fcn = nn.Sequential(*module_list) # upsample layer self.upconv = nn.ConvTranspose2d(dim_inner, dim_inner, 2, 2, 0) if cfg.MRCNN.USE_ATTENTION: self.attention = SimpleAttention(dim_in) self.apply(self._init_weights)
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.fc1 = nn.ModuleList() for i in range(num_levels): self.fc1.append(nn.Sequential( nn.Linear(dim_in * roi_size**2, hidden_dim), #nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, # eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) #self.fc1 = nn.Sequential(nn.Linear(dim_in * roi_size**2, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, # eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True)) self._init_weights()
def __init__(self, dim_in_top, dim_in_lateral): super().__init__() self.dim_in_top = dim_in_top self.dim_in_lateral = dim_in_lateral self.dim_out = dim_in_top if cfg.FPN.USE_GN: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.dim_out), self.dim_out, eps=cfg.GROUP_NORM.EPSILON), nn.Conv2d(dim_in_lateral, self.dim_out, 3, 1, 1, bias=False), nn.ReLU(inplace=True)) else: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 3, 1, 1, bias=False), nn.Conv2d(dim_in_lateral, self.dim_out, 3, 1, 1, bias=False), nn.ReLU(inplace=True)) self._init_weights() self.st = SelfTrans(n_head=1, n_mix=4, d_model=cfg.FPN.DIM, d_k=cfg.FPN.DIM, d_v=cfg.FPN.DIM) self.gt = GroundTrans(in_channels=cfg.FPN.DIM, inter_channels=None, mode='dot', dimension=2, bn_layer=True)
def basic_gn_stem(): stride_3d = (1, 2, 2) if cfg.LESION.NO_DEPTH_PAD else 2 return nn.Sequential(OrderedDict([ ('conv1', nn.Conv3d(1, 64, 7, stride=stride_3d, padding=3, bias=False)), ('gn1', nn.GroupNorm(net_utils.get_group_gn(64), 64, eps=cfg.GROUP_NORM.EPSILON)), ('relu', nn.ReLU(inplace=True)), ('maxpool', nn.MaxPool3d(kernel_size=3, stride=stride_3d, padding=1))]))
def basic_gn_shortcut(inplanes, outplanes, stride): return nn.Sequential( nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False), nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON) )
def basic_gn_stem(model, data, **kwargs): """Add a basic ResNet stem (using GN)""" dim = 64 p = model.ConvGN( data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2 ) p = model.Relu(p, p) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) return p, dim
def basic_gn_stem(): #if cfg.LESION.LESION_ENABLED: #input_dim = cfg.LESION.SLICE_NUM #else: input_dim = 3 return nn.Sequential(OrderedDict([ ('conv1', nn.Conv2d(input_dim, 64, 7, stride=2, padding=3, bias=False)), ('gn1', nn.GroupNorm(net_utils.get_group_gn(64), 64, eps=cfg.GROUP_NORM.EPSILON)), ('relu', nn.ReLU(inplace=True)), ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))]))
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc1 = nn.Sequential( nn.Linear(dim_in * roi_size**2, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = nn.Sequential( nn.Linear(hidden_dim, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self._init_weights()
def basic_gn_shortcut(inplanes, outplanes, stride): stride_3d = (1, stride, stride) if cfg.LESION.NO_DEPTH_PAD else stride return nn.Sequential( nn.Conv3d(inplanes, outplanes, kernel_size=1, stride=stride_3d, bias=False), nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON) )
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc1 = nn.Sequential(nn.Linear(dim_in * roi_size**2, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self._init_weights()
def __init__(self, dim_in_top, dim_in_lateral): super().__init__() self.dim_in_top = dim_in_top self.dim_in_lateral = dim_in_lateral self.dim_out = dim_in_top if cfg.FPN.USE_GN: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.dim_out), self.dim_out, eps=cfg.GROUP_NORM.EPSILON) ) else: self.conv_lateral = nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0) self._init_weights()
def __init__(self, dim_in_top, dim_in_lateral): super().__init__() self.dim_in_top = dim_in_top self.dim_in_lateral = dim_in_lateral self.dim_out = dim_in_top if cfg.FPN.USE_GN: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0, bias = False), nn.GroupNorm(net_utils.get_group_gn(self.dim_out), self.dim_out, eps = cfg.GROUP_NORM.EPSILON) ) else: self.conv_lateral = nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0) self._init_weights()
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale, num_convs): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN(current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose(current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): if dim_in == dim_out: return blob_in # output name is prefix + '_branch1_gn' return model.ConvGN( blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=stride, pad=0, group=1, )
def __init__(self, dim_in_top, dim_in_lateral): super().__init__() self.dim_in_top = dim_in_top self.dim_in_lateral = dim_in_lateral self.dim_out = dim_in_top if cfg.FPN.USE_GN: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.dim_out), self.dim_out, eps=cfg.GROUP_NORM.EPSILON)) elif cfg.FPN.USE_SN: self.conv_lateral = nn.Sequential( nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0, bias=False), mynn.SwitchNorm( self.dim_out, using_moving_average=(not cfg.TEST.USE_BATCH_AVG), using_bn=cfg.FPN.SN.USE_BN)) else: self.conv_lateral = nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0) self._init_weights()
def __init__(self, conv_body_func, fpn_level_info, P2only = False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] # a list of scales for FPN outputs # # Step 1: recursively build down starting from the coarsest backbone level # # For the coarest backbone level: 1x1 conv only seeds recursion self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) if cfg.FPN.USE_GN: self.conv_top = nn.Sequential( nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias = False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps = cfg.GROUP_NORM.EPSILON) ) else: self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.topdown_lateral_modules = nn.ModuleList() self.posthoc_modules = nn.ModuleList() # For other levels add top-down and lateral connections for i in range(self.num_backbone_stages - 1): self.topdown_lateral_modules.append( topdown_lateral_module(fpn_dim, fpn_dim_lateral[i + 1]) ) # Post-hoc scale-specific 3x3 convs for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append(nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias = False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps = cfg.GROUP_NORM.EPSILON) )) else: self.posthoc_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper # Use max pooling to simulate stride 2 subsampling self.maxpool_p6 = nn.MaxPool2d(kernel_size = 1, stride = 2, padding = 0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules( nn.Conv2d(dim_in, fpn_dim, 3, 2, 1) ) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: # use only the finest level self.spatial_scale = self.spatial_scale[-1] self._init_weights() # Deliberately add conv_body after _init_weights. # conv_body has its own _init_weights function self.conv_body = conv_body_func() # e.g resnet
def __init__(self, dim_in, hidden_dim=256, num_convs=4): super().__init__() self.dim_in = dim_in self.num_convs = num_convs self.hidden_dim = hidden_dim #self.num_convs = 4 # 4 in fast rcnn heads #self.hidden_dim = 256 # FAST_RCNN.CONV_HEAD_DIM = 256 self.position_cls = 3 self.position_threshold = [] module_list = [] if 1: module_list.extend([ nn.Conv2d(dim_in, dim_in, 3, 2, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(dim_in), dim_in, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) module_list.extend([ nn.Conv2d(dim_in, self.hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.hidden_dim), self.hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) module_list.extend([ nn.Conv2d(self.hidden_dim, self.hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.hidden_dim), self.hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) else: module_list.extend([ nn.Conv2d(dim_in, self.hidden_dim, 3, 2, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.hidden_dim), self.hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) for i in range(self.num_convs - 1): # 4 in fast rcnn heads module_list.extend([ nn.Conv2d(self.hidden_dim, self.hidden_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(self.hidden_dim), self.hidden_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) ]) self.convs = nn.Sequential(*module_list) #self.dim_out = cfg.FAST_RCNN.MLP_HEAD_DIM #1024 self.dim_out = self.hidden_dim #self.fc1 = nn.Linear(self.hidden_dim * 49, self.dim_out) #self.fc1 = nn.Linear(self.hidden_dim, self.dim_out) self.avgpool = nn.AdaptiveAvgPool2d(1) self._init_weights()
def __init__(self, conv_body_func, fpn_level_info, P2only=False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] # a list of scales for FPN outputs # # Step 1: recursively build down starting from the coarsest backbone level # # For the coarest backbone level: 1x1 conv only seeds recursion # self.conv_top = nn.Conv3d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) # neglected by shuzhang if cfg.FPN.USE_GN: self.conv_top = nn.Sequential( nn.Conv3d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) ) else: self.conv_top = nn.Conv3d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.topdown_lateral_modules = nn.ModuleList() self.posthoc_modules = nn.ModuleList() # For other levels add top-down and lateral connections for i in range(self.num_backbone_stages - 1): self.topdown_lateral_modules.append( topdown_lateral_module(fpn_dim, fpn_dim_lateral[i+1]) ) # Post-hoc scale-specific 3x3 convs scale_3d = 1 #for top-down filter output downscale problem in 3d input for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: # use all depth-wise self.posthoc_modules.append(nn.Sequential( nn.Conv3d(fpn_dim, fpn_dim, (cfg.LESION.SLICE_NUM, 3, 3), 1, (0, 1, 1), bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) )) else: self.posthoc_modules.append( nn.Conv3d(fpn_dim, fpn_dim, 3, 1, 1) ) #scale_3d = scale_3d * 2 self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper # Use max pooling to simulate stride 2 subsampling self.maxpool_p6 = nn.MaxPool3d(kernel_size=1, stride=(1, 2, 2), padding=0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules( nn.Conv3d(dim_in, fpn_dim, 3, 2, 1) ) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: # use only the finest level self.spatial_scale = self.spatial_scale[-1] self._init_weights() # Deliberately add conv_body after _init_weights. # conv_body has its own _init_weights function self.conv_body = conv_body_func() # e.g resnet
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def __init__(self, conv_body_func, fpn_level_info, P2only=False, fpt_rendering=False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.fpt_rendering = fpt_rendering self.st = SelfTrans(n_head=1, n_mix=4, d_model=cfg.FPN.DIM, d_k=cfg.FPN.DIM, d_v=cfg.FPN.DIM) self.rt = RenderTrans(channels_high=cfg.FPN.DIM, channels_low=cfg.FPN.DIM, upsample=False) self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - 2) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) if cfg.FPN.USE_GN: self.conv_top = nn.Sequential( nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON)) else: self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.ground_lateral_modules = nn.ModuleList() self.posthoc_modules = nn.ModuleList() for i in range(self.num_backbone_stages - 1): self.ground_lateral_modules.append( ground_lateral_module(fpn_dim, fpn_dim_lateral[i + 1])) for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append( nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.ReLU(inplace=True))) else: self.posthoc_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.ReLU(inplace=True)) self.spatial_scale.append(fpn_level_info.spatial_scales[i]) if self.fpt_rendering: self.fpt_rendering_conv1_modules = nn.ModuleList() self.fpt_rendering_conv2_modules = nn.ModuleList() for i in range(self.num_backbone_stages - 1): if cfg.FPN.USE_GN: self.fpt_rendering_conv1_modules.append( nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1, bias=True), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True))) self.fpt_rendering_conv2_modules.append( nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=True), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True))) else: self.fpt_rendering_conv1_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1)) self.fpt_rendering_conv2_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1)) if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == 6: self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if cfg.FPN.EXTRA_CONV_LEVELS and max_level > 5: self.extra_pyramid_modules = nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(6, max_level + 1): self.extra_pyramid_modules(nn.Conv2d(dim_in, fpn_dim, 3, 2, 1)) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: self.spatial_scale = self.spatial_scale[-1] self._init_weights() self.conv_body = conv_body_func() # e.g resnet
def __init__(self, conv_body_func, fpn_level_info, P2only=False, panet_buttomup=False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.panet_buttomup = panet_buttomup self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] # a list of scales for FPN outputs # # Step 1: recursively build down starting from the coarsest backbone level # # For the coarest backbone level: 1x1 conv only seeds recursion self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) if cfg.FPN.USE_GN: self.conv_top = nn.Sequential( nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) ) else: self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.topdown_lateral_modules = nn.ModuleList() self.posthoc_modules = nn.ModuleList() # For other levels add top-down and lateral connections for i in range(self.num_backbone_stages - 1): self.topdown_lateral_modules.append( topdown_lateral_module(fpn_dim, fpn_dim_lateral[i+1]) ) # Post-hoc scale-specific 3x3 convs for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append(nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) )) else: self.posthoc_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # add for panet buttom-up path if self.panet_buttomup: self.panet_buttomup_conv1_modules = nn.ModuleList() self.panet_buttomup_conv2_modules = nn.ModuleList() for i in range(self.num_backbone_stages - 1): if cfg.FPN.USE_GN: self.panet_buttomup_conv1_modules.append(nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1, bias=True), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) self.panet_buttomup_conv2_modules.append(nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=True), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), nn.ReLU(inplace=True) )) else: self.panet_buttomup_conv1_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1) ) self.panet_buttomup_conv2_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) #self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper # Use max pooling to simulate stride 2 subsampling self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules( nn.Conv2d(dim_in, fpn_dim, 3, 2, 1) ) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: # use only the finest level self.spatial_scale = self.spatial_scale[-1] self._init_weights() # Deliberately add conv_body after _init_weights. # conv_body has its own _init_weights function self.conv_body = conv_body_func() # e.g resnet