def __init__(self, dim_in, roi_xform_func, spatial_scale, num_convs): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.num_convs = num_convs dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED self.dim_out = dim_inner module_list = [] for i in range(num_convs - 1): module_list.extend([ lib.nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True) ]) dim_in = dim_inner self.conv_fcn = lib.nn.Sequential(*module_list) self.mask_conv1 = lib.nn.ModuleList() num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 for i in range(num_levels): self.mask_conv1.append(nn.Sequential( lib.nn.Conv2d(dim_in, dim_inner, 3, 1, padding=1*dilation, dilation=dilation, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(dim_inner), dim_inner, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True) )) # upsample layer self.upconv = lib.nn.ConvTranspose2d(dim_inner, dim_inner, 2, 2, 0) self.apply(self._init_weights)
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.fc1 = lib.nn.ModuleList() for i in range(num_levels): self.fc1.append( nn.Sequential( lib.nn.Linear(dim_in * roi_size**2, hidden_dim), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True))) #self.fc1 = lib.nn.Sequential(nn.Linear(dim_in * roi_size**2, hidden_dim), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, # eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = lib.nn.Sequential( nn.Linear(hidden_dim, hidden_dim), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True)) self._init_weights()
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM module_list = [] for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS - 1): module_list.extend([ lib.nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True) ]) dim_in = hidden_dim self.convs = lib.nn.Sequential(*module_list) self.dim_out = fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc = lib.nn.Linear(dim_in * roi_size * roi_size, fc_dim) num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 self.conv1_head = lib.nn.ModuleList() for i in range(num_levels): self.conv1_head.append( nn.Sequential( lib.nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True))) self._init_weights()
def __init__(self, inplanes, outplanes, innerplanes, stride=1, dilation=1, group=1, downsample=None): super().__init__() # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) self.stride = stride self.conv1 = lib.nn.Conv2d(inplanes, innerplanes, kernel_size=1, stride=str1x1, bias=False) self.gn1 = lib.nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv2 = lib.nn.Conv2d(innerplanes, innerplanes, kernel_size=3, stride=str3x3, bias=False, padding=1 * dilation, dilation=dilation, groups=group) self.gn2 = lib.nn.GroupNorm(net_utils.get_group_gn(innerplanes), innerplanes, eps=cfg.GROUP_NORM.EPSILON) self.conv3 = lib.nn.Conv2d(innerplanes, outplanes, kernel_size=1, stride=1, bias=False) self.gn3 = lib.nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON) self.downsample = downsample self.relu = lib.nn.ReLU(inplace=True)
def basic_gn_shortcut(inplanes, outplanes, stride): return lib.nn.Sequential( lib.nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(outplanes), outplanes, eps=cfg.GROUP_NORM.EPSILON))
def __init__(self, dim_in, roi_xform_func, spatial_scale): super().__init__() self.dim_in = dim_in self.roi_xform = roi_xform_func self.spatial_scale = spatial_scale self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION self.fc1 = lib.nn.Sequential( nn.Linear(dim_in * roi_size**2, hidden_dim), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self.fc2 = lib.nn.Sequential( nn.Linear(hidden_dim, hidden_dim), lib.nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim, eps=cfg.GROUP_NORM.EPSILON)) self._init_weights()
def basic_gn_stem(): return lib.nn.Sequential( OrderedDict([ ('conv1', lib.nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False)), ('gn1', lib.nn.GroupNorm(net_utils.get_group_gn(64), 64, eps=cfg.GROUP_NORM.EPSILON)), ('relu', lib.nn.ReLU(inplace=True)), ('maxpool', lib.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) ]))
def __init__(self, dim_in_top, dim_in_lateral): super().__init__() self.dim_in_top = dim_in_top self.dim_in_lateral = dim_in_lateral self.dim_out = dim_in_top if cfg.FPN.USE_GN: self.conv_lateral = lib.nn.Sequential( lib.nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(self.dim_out), self.dim_out, eps=cfg.GROUP_NORM.EPSILON) ) else: self.conv_lateral = lib.nn.Conv2d(dim_in_lateral, self.dim_out, 1, 1, 0) self._init_weights()
def __init__(self, conv_body_func, fpn_level_info, P2only=False, panet_buttomup=False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.panet_buttomup = panet_buttomup self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] # a list of scales for FPN outputs # # Step 1: recursively build down starting from the coarsest backbone level # # For the coarest backbone level: 1x1 conv only seeds recursion self.conv_top = lib.nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) if cfg.FPN.USE_GN: self.conv_top = lib.nn.Sequential( lib.nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) ) else: self.conv_top = lib.nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.topdown_lateral_modules = lib.nn.ModuleList() self.posthoc_modules = lib.nn.ModuleList() # For other levels add top-down and lateral connections for i in range(self.num_backbone_stages - 1): self.topdown_lateral_modules.append( topdown_lateral_module(fpn_dim, fpn_dim_lateral[i+1]) ) # Post-hoc scale-specific 3x3 convs for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append(nn.Sequential( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False), lib.nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON) )) else: self.posthoc_modules.append( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # add for panet buttom-up path if self.panet_buttomup: self.panet_buttomup_conv1_modules = lib.nn.ModuleList() self.panet_buttomup_conv2_modules = lib.nn.ModuleList() for i in range(self.num_backbone_stages - 1): if cfg.FPN.USE_GN: self.panet_buttomup_conv1_modules.append(nn.Sequential( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1, bias=True), lib.nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True) )) self.panet_buttomup_conv2_modules.append(nn.Sequential( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=True), lib.nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps=cfg.GROUP_NORM.EPSILON), lib.nn.ReLU(inplace=True) )) else: self.panet_buttomup_conv1_modules.append( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 2, 1) ) self.panet_buttomup_conv2_modules.append( lib.nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) #self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper # Use max pooling to simulate stride 2 subsampling self.maxpool_p6 = lib.nn.MaxPool2d(kernel_size=1, stride=2, padding=0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = lib.nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules( lib.nn.Conv2d(dim_in, fpn_dim, 3, 2, 1) ) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: # use only the finest level self.spatial_scale = self.spatial_scale[-1] self._init_weights() # Deliberately add conv_body after _init_weights. # conv_body has its own _init_weights function self.conv_body = conv_body_func() # e.g resnet